From 7b306ebeaca900f7a45366f5a2f793c1018a51b3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 18 Dec 2022 09:45:56 +0100 Subject: [PATCH] [lhe] ** COMPLETE LHE PART 4 (RANDOM COLOR) ** rerun 15 tmad and 60 tput alltees - finally all ok This completes the random color choice #402 This took around 8 hours from 1h to 9h STARTED AT Sun Dec 18 07:32:03 CET 2022 ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttg -ggttgg -ggttggg -makeclean ENDED(1) AT Sun Dec 18 08:39:14 CET 2022 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Sun Dec 18 09:00:20 CET 2022 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Sun Dec 18 09:09:51 CET 2022 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Sun Dec 18 09:13:28 CET 2022 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Sun Dec 18 09:17:01 CET 2022 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 132 +-- .../log_eemumu_mad_f_inl0_hrd0.txt | 130 +-- .../log_eemumu_mad_m_inl0_hrd0.txt | 134 +-- .../log_ggtt_mad_d_inl0_hrd0.txt | 130 +-- .../log_ggtt_mad_f_inl0_hrd0.txt | 132 +-- .../log_ggtt_mad_m_inl0_hrd0.txt | 134 +-- .../log_ggttg_mad_d_inl0_hrd0.txt | 136 +-- .../log_ggttg_mad_f_inl0_hrd0.txt | 128 +-- .../log_ggttg_mad_m_inl0_hrd0.txt | 130 +-- .../log_ggttgg_mad_d_inl0_hrd0.txt | 899 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 899 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 897 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 136 +-- .../log_ggttggg_mad_f_inl0_hrd0.txt | 136 +-- .../log_ggttggg_mad_m_inl0_hrd0.txt | 134 +-- .../log_eemumu_mad_d_inl0_hrd0.txt | 86 +- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 86 +- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 86 +- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 86 +- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 86 +- .../log_eemumu_mad_d_inl0_hrd1.txt | 86 +- .../log_eemumu_mad_d_inl1_hrd0.txt | 90 +- .../log_eemumu_mad_d_inl1_hrd1.txt | 90 +- .../log_eemumu_mad_f_inl0_hrd0.txt | 86 +- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 86 +- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 86 +- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 86 +- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 86 +- .../log_eemumu_mad_f_inl0_hrd1.txt | 86 +- .../log_eemumu_mad_f_inl1_hrd0.txt | 88 +- .../log_eemumu_mad_f_inl1_hrd1.txt | 88 +- .../log_ggtt_mad_d_inl0_hrd0.txt | 94 +- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 94 +- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 94 +- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 94 +- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 94 +- .../log_ggtt_mad_d_inl0_hrd1.txt | 94 +- .../log_ggtt_mad_d_inl1_hrd0.txt | 96 +- .../log_ggtt_mad_d_inl1_hrd1.txt | 96 +- .../log_ggtt_mad_f_inl0_hrd0.txt | 94 +- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 94 +- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 94 +- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 94 +- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 94 +- .../log_ggtt_mad_f_inl0_hrd1.txt | 94 +- .../log_ggtt_mad_f_inl1_hrd0.txt | 96 +- .../log_ggtt_mad_f_inl1_hrd1.txt | 96 +- .../log_ggttg_mad_d_inl0_hrd0.txt | 110 +-- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 110 +-- .../log_ggttg_mad_d_inl0_hrd1.txt | 110 +-- .../log_ggttg_mad_f_inl0_hrd0.txt | 110 +-- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 110 +-- .../log_ggttg_mad_f_inl0_hrd1.txt | 110 +-- .../log_ggttgg_mad_d_inl0_hrd0.txt | 108 +-- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 108 +-- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 108 +-- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 108 +-- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 108 +-- .../log_ggttgg_mad_d_inl0_hrd1.txt | 108 +-- .../log_ggttgg_mad_d_inl1_hrd0.txt | 110 +-- .../log_ggttgg_mad_d_inl1_hrd1.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd0.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 108 +-- .../log_ggttgg_mad_f_inl0_hrd1.txt | 108 +-- .../log_ggttgg_mad_f_inl1_hrd0.txt | 114 +-- .../log_ggttgg_mad_f_inl1_hrd1.txt | 108 +-- .../log_ggttggg_mad_d_inl0_hrd0.txt | 108 +-- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 108 +-- .../log_ggttggg_mad_d_inl0_hrd1.txt | 108 +-- .../log_ggttggg_mad_f_inl0_hrd0.txt | 108 +-- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 108 +-- .../log_ggttggg_mad_f_inl0_hrd1.txt | 108 +-- 75 files changed, 5159 insertions(+), 5072 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index bba2ce0a40..7dc8bc66cd 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,24 +2,24 @@ Working directory (build): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 + make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' @@ -31,7 +31,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_11:21:28 +DATE: 2022-12-18_02:31:05 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.1336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1204s - [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.0737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.0604s + [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2003s - [COUNTERS] Fortran MEs ( 1 ) : 0.0132s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2023s + [COUNTERS] Fortran MEs ( 1 ) : 0.0132s for 8192 events => throughput is 6.19E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5651s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4225s - [COUNTERS] Fortran MEs ( 1 ) : 0.1426s for 90112 events => throughput is 6.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4229s + [COUNTERS] Fortran MEs ( 1 ) : 0.1429s for 90112 events => throughput is 6.31E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,8 +127,8 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681771] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2429s + [COUNTERS] PROGRAM TOTAL : 0.2791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2725s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5214s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0724s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5073s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4349s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0724s for 90112 events => throughput is 1.25E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.283677e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.287695e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.310445e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311116e+06 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681771] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2295s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4293s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0350s for 90112 events => throughput is 2.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0353s for 90112 events => throughput is 2.56E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.675659e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.680928e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.843466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.848295e+06 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681773] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2323s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2469s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.25E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813669E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4317s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 90112 events => throughput is 4.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4313s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0212s for 90112 events => throughput is 4.25E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.582630e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.528834e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.092469e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.063451e+06 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681773] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.3626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3608s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2519s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.43E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813669E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 90112 events => throughput is 4.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4501s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 90112 events => throughput is 4.42E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.873832e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.851509e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.343251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.368892e+06 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681771] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2190s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.07E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813669E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4353s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 90112 events => throughput is 4.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4567s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 90112 events => throughput is 4.12E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.622889e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.550924e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.196571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.133836e+06 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681771] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6920s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6915s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6704s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6699s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.8763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.76E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8703s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.80E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.713526e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737638e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.287565e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.416377e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.205238e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.176780e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030110e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031050e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.196570e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.147165e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.078085e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078296e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.173429e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.140802e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.004450e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012168e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 69fcf82b04..de68058270 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -4,34 +4,34 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_11:21:46 +DATE: 2022-12-18_02:31:23 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll @@ -56,8 +56,8 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [] fbridge_mode=0 [COUNTERS] PROGRAM TOTAL : 0.0384s - [COUNTERS] Fortran Overhead ( 0 ) : 0.0252s - [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.17E+05 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.0251s + [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.16E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2130s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1997s - [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2011s + [COUNTERS] Fortran MEs ( 1 ) : 0.0132s for 8192 events => throughput is 6.19E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4235s - [COUNTERS] Fortran MEs ( 1 ) : 0.1428s for 90112 events => throughput is 6.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4218s + [COUNTERS] Fortran MEs ( 1 ) : 0.1429s for 90112 events => throughput is 6.30E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,8 +127,8 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166140620297] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2368s + [COUNTERS] PROGRAM TOTAL : 0.2232s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2167s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.26E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907784661551E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5017s + [COUNTERS] PROGRAM TOTAL : 0.5016s [COUNTERS] Fortran Overhead ( 0 ) : 0.4298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0720s for 90112 events => throughput is 1.25E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 90112 events => throughput is 1.26E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.314078e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.314575e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.326908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327429e+06 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165549479658] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2214s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2732s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.77E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905692857918E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 90112 events => throughput is 4.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4430s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4241s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 90112 events => throughput is 4.77E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.132537e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.138251e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.521734e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.477573e+06 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099930] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5749s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.73E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2316s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2305s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 7.82E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4423s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4307s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 90112 events => throughput is 7.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4327s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 90112 events => throughput is 7.73E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.099573e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.255776e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.037883e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025894e+07 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099930] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2267s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2426s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.16E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4461s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4350s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4405s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4294s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 90112 events => throughput is 8.12E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.339902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.610286e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064303e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092222e+07 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166431914253] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2424s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2435s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 7.11E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501909358591482E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4468s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4341s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 90112 events => throughput is 7.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4329s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 90112 events => throughput is 7.04E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.584735e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.907088e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.017122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042026e+07 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166796068879] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6473s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910316213006E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.8747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7682s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.997618e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.085378e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.041848e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.923417e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.672678e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.045504e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.493712e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.490691e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.664854e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.254689e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.760907e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.746790e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.644663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.212493e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.452592e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.459789e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 0f25dbe34f..82a543401c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -5,33 +5,33 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_11:22:05 +DATE: 2022-12-18_02:31:43 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.0388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.0254s - [COUNTERS] Fortran MEs ( 1 ) : 0.0134s for 8192 events => throughput is 6.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.0382s + [COUNTERS] Fortran Overhead ( 0 ) : 0.0249s + [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.17E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2140s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2007s - [COUNTERS] Fortran MEs ( 1 ) : 0.0133s for 8192 events => throughput is 6.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2147s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2015s + [COUNTERS] Fortran MEs ( 1 ) : 0.0132s for 8192 events => throughput is 6.19E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4222s - [COUNTERS] Fortran MEs ( 1 ) : 0.1427s for 90112 events => throughput is 6.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4211s + [COUNTERS] Fortran MEs ( 1 ) : 0.1431s for 90112 events => throughput is 6.30E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,8 +127,8 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2377s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s + [COUNTERS] PROGRAM TOTAL : 0.2472s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2405s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.5103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4363s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0740s for 90112 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5163s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4424s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0739s for 90112 events => throughput is 1.22E+06 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.253940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.261846e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.283739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.285736e+06 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2376s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4318s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 90112 events => throughput is 2.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4649s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4301s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 90112 events => throughput is 2.59E+06 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.687554e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.691031e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.828697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.853583e+06 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975944] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2101s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2296s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2277s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.28E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 90112 events => throughput is 4.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4508s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 90112 events => throughput is 4.28E+06 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.655231e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.640653e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.100205e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.171042e+06 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975944] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2271s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2253s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4499s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 90112 events => throughput is 4.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 90112 events => throughput is 4.54E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.934402e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.844651e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.472117e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.480255e+06 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975944] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2105s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.12E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4534s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 90112 events => throughput is 4.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4322s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 90112 events => throughput is 4.18E+06 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.529561e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.498610e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.087153e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.071055e+06 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587266] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6463s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6448s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,8 +529,8 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_eem [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173582E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.8802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8751s + [COUNTERS] PROGRAM TOTAL : 0.8761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8709s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 90112 events => throughput is 1.74E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.738590e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718262e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.399250e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.295707e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.178327e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.187394e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.023566e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028393e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.200231e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.201698e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080151e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026546e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.195910e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.194005e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.997807e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998695e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index e7a1f6e36e..7e431bbcae 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -10,10 +10,9 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -23,15 +22,16 @@ make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_11:22:23 +DATE: 2022-12-18_02:32:02 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.2689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2062s - [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2547s + [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 8192 events => throughput is 1.31E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600123] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3394s - [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3379s + [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 8192 events => throughput is 1.31E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775350] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.1662s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4826s - [COUNTERS] Fortran MEs ( 1 ) : 0.6835s for 90112 events => throughput is 1.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1675s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4844s + [COUNTERS] Fortran MEs ( 1 ) : 0.6830s for 90112 events => throughput is 1.32E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,8 +127,8 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4316s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3840s + [COUNTERS] PROGRAM TOTAL : 0.4625s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4150s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0475s for 8192 events => throughput is 1.72E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.0479s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5233s for 90112 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0512s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5231s for 90112 events => throughput is 1.72E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.757740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.754658e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.758485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.750367e+05 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3627s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4246s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,8 +233,8 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5055s + [COUNTERS] PROGRAM TOTAL : 1.7958s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5164s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2794s for 90112 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282987e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.285773e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.299639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288021e+05 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5584s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5427s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.20E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6734s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1742s for 90112 events => throughput is 5.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7190s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5448s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1741s for 90112 events => throughput is 5.18E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.289918e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.335989e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.394293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.397120e+05 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3658s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4016s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.71E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6590s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1585s for 90112 events => throughput is 5.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6527s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4946s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1581s for 90112 events => throughput is 5.70E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.878944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.855410e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.923342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.974100e+05 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,8 +423,8 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3596s + [COUNTERS] PROGRAM TOTAL : 2.1574s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1352s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.70E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2441s for 90112 events => throughput is 3.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7481s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2444s for 90112 events => throughput is 3.69E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.771863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763945e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.807860e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789715e+05 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7841s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7835s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7839s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.9348s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9281s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9337s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9269s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.860157e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.852631e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.537492e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502159e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.636963e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.633729e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079315e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072643e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.639752e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635731e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154898e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155819e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.630968e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635002e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.037682e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.047036e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 14f4b32cbe..15c1c18b8b 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -4,19 +4,16 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for `all'. -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -30,8 +27,11 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for `all'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_11:22:51 +DATE: 2022-12-18_02:32:33 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.2614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1991s - [COUNTERS] Fortran MEs ( 1 ) : 0.0623s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2610s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1981s + [COUNTERS] Fortran MEs ( 1 ) : 0.0628s for 8192 events => throughput is 1.30E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600123] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s - [COUNTERS] Fortran MEs ( 1 ) : 0.0625s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4015s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3382s + [COUNTERS] Fortran MEs ( 1 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775350] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.1704s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4873s - [COUNTERS] Fortran MEs ( 1 ) : 0.6831s for 90112 events => throughput is 1.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1669s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4833s + [COUNTERS] Fortran MEs ( 1 ) : 0.6836s for 90112 events => throughput is 1.32E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,8 +127,8 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706211693566] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4289s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3827s + [COUNTERS] PROGRAM TOTAL : 0.4535s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782418787799] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.0387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5303s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5085s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5084s for 90112 events => throughput is 1.77E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.815516e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821515e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.821174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821126e+05 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,8 +201,8 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690707641465352] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3541s + [COUNTERS] PROGRAM TOTAL : 0.3919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786452345522] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6935s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1975s for 90112 events => throughput is 4.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6848s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1923s for 90112 events => throughput is 4.69E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.746334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.763916e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.771174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.795980e+05 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698819656788] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782736292968] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5884s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0953s for 90112 events => throughput is 9.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0950s for 90112 events => throughput is 9.48E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.907975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.866762e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.994657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000932e+06 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,8 +349,8 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698819656788] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3535s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3454s + [COUNTERS] PROGRAM TOTAL : 0.3829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782736292968] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5794s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4898s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0896s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.5804s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0893s for 90112 events => throughput is 1.01E+06 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.052079e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055177e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.071890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063184e+06 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703490151122] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4054s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.32E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223787021597488] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4949s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1231s for 90112 events => throughput is 7.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6131s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1230s for 90112 events => throughput is 7.33E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.551019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.573890e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.639612e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.700410e+05 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697973] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7851s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175973] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.9343s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9286s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 90112 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.61E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.134782e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.015579e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.735228e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.695019e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.008700e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.950149e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.442695e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.467371e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.853045e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.973285e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.555924e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.559751e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.000518e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.898971e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.203486e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204246e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 84f4d4912a..4843bc0e6f 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -2,36 +2,36 @@ Working directory (build): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_11:23:19 +DATE: 2022-12-18_02:33:00 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -55,8 +55,8 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.2608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1981s + [COUNTERS] PROGRAM TOTAL : 0.2625s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1999s [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 8192 events => throughput is 1.31E+05 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/out [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600123] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3372s - [COUNTERS] Fortran MEs ( 1 ) : 0.0624s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4011s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3384s + [COUNTERS] Fortran MEs ( 1 ) : 0.0627s for 8192 events => throughput is 1.31E+05 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,8 +103,8 @@ Executing ' ./madevent < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775350] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.1633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4800s + [COUNTERS] PROGRAM TOTAL : 2.1680s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4848s [COUNTERS] Fortran MEs ( 1 ) : 0.6832s for 90112 events => throughput is 1.32E+05 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** @@ -127,9 +127,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032019] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4330s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0483s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4772s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4290s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280974] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 2.0646s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5311s for 90112 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0566s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5260s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5307s for 90112 events => throughput is 1.70E+05 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.733611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.738183e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.745152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.744532e+05 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032019] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3871s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4253s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.28E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280981] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5070s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2768s for 90112 events => throughput is 3.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7832s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2753s for 90112 events => throughput is 3.27E+05 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.307335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318764e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.345368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347341e+05 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3522s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4186s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.27E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032026] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6700s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4986s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1715s for 90112 events => throughput is 5.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6659s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1711s for 90112 events => throughput is 5.27E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.380102e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.390414e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.479554e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.490755e+05 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3526s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3768s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3627s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032026] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6529s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1555s for 90112 events => throughput is 5.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6594s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5028s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1566s for 90112 events => throughput is 5.75E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.911402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.942503e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.058231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.065398e+05 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3920s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3600s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.74E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032026] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7546s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2408s for 90112 events => throughput is 3.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5067s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2404s for 90112 events => throughput is 3.75E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.809603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.757314e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.839123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.848311e+05 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7842s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.35E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7810s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744777] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.9374s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 90112 events => throughput is 1.33E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9327s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9260s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.35E+07 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.853310e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.836257e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.546115e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.514976e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.596835e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.646508e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064249e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056425e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.628007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617318e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.133958e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.135919e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609516e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.631126e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.972453e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.007888e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 413f49ef3a..391bd68e4f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,36 +2,36 @@ Working directory (build): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_11:23:48 +DATE: 2022-12-18_02:33:30 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -56,8 +56,8 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [] fbridge_mode=0 [COUNTERS] PROGRAM TOTAL : 0.6228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2215s - [COUNTERS] Fortran MEs ( 1 ) : 0.4013s for 8192 events => throughput is 2.04E+04 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2217s + [COUNTERS] Fortran MEs ( 1 ) : 0.4010s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454797E-002] fbridge_mode=0 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3168s - [COUNTERS] Fortran MEs ( 1 ) : 0.4015s for 8192 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3155s + [COUNTERS] Fortran MEs ( 1 ) : 0.4012s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.1307s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7197s - [COUNTERS] Fortran MEs ( 1 ) : 4.4110s for 90112 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1208s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7148s + [COUNTERS] Fortran MEs ( 1 ) : 4.4060s for 90112 events => throughput is 2.05E+04 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454769E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 1.0480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3678s for 8192 events => throughput is 2.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0462s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3681s for 8192 events => throughput is 2.23E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870294E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.1647s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1196s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0451s for 90112 events => throughput is 2.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1015s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0406s for 90112 events => throughput is 2.23E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.299163e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304045e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303108e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.305942e+04 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454769E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1929s for 8192 events => throughput is 4.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1927s for 8192 events => throughput is 4.25E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870294E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.0530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9313s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.1217s for 90112 events => throughput is 4.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.0455s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9248s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.1207s for 90112 events => throughput is 4.25E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.383224e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.383316e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.389413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.390165e+04 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454769E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.5217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4208s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1009s for 8192 events => throughput is 8.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5229s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4222s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1006s for 8192 events => throughput is 8.14E+04 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.9481s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8386s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1094s for 90112 events => throughput is 8.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.9580s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8478s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1102s for 90112 events => throughput is 8.12E+04 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.278079e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.353201e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.370684e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.369470e+04 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454769E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.4984s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0897s for 8192 events => throughput is 9.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0896s for 8192 events => throughput is 9.14E+04 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.8269s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8389s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9880s for 90112 events => throughput is 9.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.8126s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8257s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9869s for 90112 events => throughput is 9.13E+04 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.405026e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.426770e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.426980e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.460142e+04 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454769E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.5591s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4408s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1183s for 8192 events => throughput is 6.92E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1182s for 8192 events => throughput is 6.93E+04 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.1616s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8615s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3001s for 90112 events => throughput is 6.93E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.2037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9014s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3023s for 90112 events => throughput is 6.92E+04 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.018936e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.999180e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.025087e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022661e+04 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454755E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7677s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7674s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.67E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870322E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2001s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 90112 events => throughput is 4.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.1996s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 90112 events => throughput is 4.58E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.926562e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.992617e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.963762e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.971997e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.006592e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.015527e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.214534e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.213107e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.017828e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.025792e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.221420e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.220622e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.017552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.011223e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.756511e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.749526e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index f31713933c..03b06ceeed 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -10,28 +10,28 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_11:24:36 +DATE: 2022-12-18_02:34:18 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.6215s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2204s - [COUNTERS] Fortran MEs ( 1 ) : 0.4012s for 8192 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6225s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2214s + [COUNTERS] Fortran MEs ( 1 ) : 0.4011s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454797E-002] fbridge_mode=0 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s - [COUNTERS] Fortran MEs ( 1 ) : 0.4013s for 8192 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s + [COUNTERS] Fortran MEs ( 1 ) : 0.4009s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.1223s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7135s - [COUNTERS] Fortran MEs ( 1 ) : 4.4088s for 90112 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1230s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7169s + [COUNTERS] Fortran MEs ( 1 ) : 4.4060s for 90112 events => throughput is 2.05E+04 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195077561096507E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 1.0301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6692s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3608s for 8192 events => throughput is 2.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3606s for 8192 events => throughput is 2.27E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311077331566381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.0655s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0940s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.9714s for 90112 events => throughput is 2.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.0552s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0868s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.9684s for 90112 events => throughput is 2.27E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.351718e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.351722e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352378e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353707e+04 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195053770617948E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.5293s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4233s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1061s for 8192 events => throughput is 7.72E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5273s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4214s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1059s for 8192 events => throughput is 7.74E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311070749451528E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.0061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8404s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1657s for 90112 events => throughput is 7.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.0047s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8395s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1652s for 90112 events => throughput is 7.73E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.928457e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.928741e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.937420e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.951011e+04 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -307,9 +307,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311067173078164E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3378s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5564s for 90112 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3384s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7829s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5555s for 90112 events => throughput is 1.62E+05 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.662305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644320e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.662537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670558e+05 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195081501783867E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.4101s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3645s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 8192 events => throughput is 1.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4080s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3625s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0454s for 8192 events => throughput is 1.80E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311067173078164E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2833s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5016s for 90112 events => throughput is 1.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2746s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5008s for 90112 events => throughput is 1.80E+05 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.868291e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.867715e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.871598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870742e+05 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195093891816589E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.4330s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0576s for 8192 events => throughput is 1.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4323s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0575s for 8192 events => throughput is 1.42E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311077675762011E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4273s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7942s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6331s for 90112 events => throughput is 1.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4231s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6322s for 90112 events => throughput is 1.43E+05 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.448418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.449587e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.452380e+05 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195077245643899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7640s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7618s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7609s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.45E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311081628461637E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.1905s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1807s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 90112 events => throughput is 9.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1701s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 90112 events => throughput is 9.27E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236571e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.248693e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.840990e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.836475e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.425326e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.439126e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.468311e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.397170e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.452096e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438818e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.547330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.499386e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.379589e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.378173e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.616182e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.611722e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index dd13a9f3cf..c53c469adc 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,34 +4,34 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_11:25:18 +DATE: 2022-12-18_02:35:00 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -55,8 +55,8 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 0.6222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s + [COUNTERS] PROGRAM TOTAL : 0.6211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2196s [COUNTERS] Fortran MEs ( 1 ) : 0.4014s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** @@ -79,8 +79,8 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/ou [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085723454797E-002] fbridge_mode=0 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3188s + [COUNTERS] PROGRAM TOTAL : 0.7169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3157s [COUNTERS] Fortran MEs ( 1 ) : 0.4013s for 8192 events => throughput is 2.04E+04 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088744870252E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.1221s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7147s - [COUNTERS] Fortran MEs ( 1 ) : 4.4075s for 90112 events => throughput is 2.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.1273s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7138s + [COUNTERS] Fortran MEs ( 1 ) : 4.4135s for 90112 events => throughput is 2.04E+04 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195086564193622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 1.0578s + [COUNTERS] PROGRAM TOTAL : 1.0577s [COUNTERS] Fortran Overhead ( 0 ) : 0.6837s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3740s for 8192 events => throughput is 2.19E+04 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3741s for 8192 events => throughput is 2.19E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311089500389039E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 6.2233s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1068s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1165s for 90112 events => throughput is 2.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.2316s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1111s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1205s for 90112 events => throughput is 2.19E+04 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.266727e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.266756e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.263700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.266881e+04 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195086605026057E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.6946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5054s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1893s for 8192 events => throughput is 4.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1892s for 8192 events => throughput is 4.33E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311089502614300E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.0127s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9306s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0822s for 90112 events => throughput is 4.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.0050s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9247s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.0803s for 90112 events => throughput is 4.33E+04 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.435644e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.443188e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.443341e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.445642e+04 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195086387233925E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.5177s + [COUNTERS] PROGRAM TOTAL : 0.5175s [COUNTERS] Fortran Overhead ( 0 ) : 0.4180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0997s for 8192 events => throughput is 8.21E+04 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0995s for 8192 events => throughput is 8.23E+04 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311089374572210E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.9358s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8376s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0982s for 90112 events => throughput is 8.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.9371s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8408s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0963s for 90112 events => throughput is 8.22E+04 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.422097e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.426773e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.433095e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.443955e+04 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195086387233925E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.4952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0883s for 8192 events => throughput is 9.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0882s for 8192 events => throughput is 9.29E+04 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311089374572210E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.8001s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8281s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9721s for 90112 events => throughput is 9.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7965s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8259s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9706s for 90112 events => throughput is 9.28E+04 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.583753e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.603193e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.598343e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.603446e+04 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195086558415522E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.5651s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1221s for 8192 events => throughput is 6.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5642s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1222s for 8192 events => throughput is 6.70E+04 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311089468185382E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.2130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8686s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3445s for 90112 events => throughput is 6.70E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.2054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8612s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.3442s for 90112 events => throughput is 6.70E+04 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.786063e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.793690e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.805540e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.780845e+04 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7195085904007147E-002] fbridge_mode=1 [UNWEIGHT] Wrote 40 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 0.7707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7689s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7716s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.64E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1311088735359069E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2020s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 90112 events => throughput is 4.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.2037s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 90112 events => throughput is 4.54E+06 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.890224e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.994291e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.520331e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.485259e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.007426e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.000848e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.212442e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.209756e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.993087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.970592e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.216130e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.218744e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.996600e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.935530e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.734898e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728124e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index f1e3556691..0a3744e4e9 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,37 +1,37 @@ Working directory (build): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 + +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_11:26:06 +DATE: 2022-12-18_02:35:48 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 5.1894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3697s - [COUNTERS] Fortran MEs ( 1 ) : 4.8197s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.1156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3008s + [COUNTERS] Fortran MEs ( 1 ) : 4.8148s for 8192 events => throughput is 1.70E+03 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277245934316629E-004] fbridge_mode=0 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 5.2698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4577s - [COUNTERS] Fortran MEs ( 1 ) : 4.8122s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.2325s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3922s + [COUNTERS] Fortran MEs ( 1 ) : 4.8403s for 8192 events => throughput is 1.69E+03 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803949688410202E-004] fbridge_mode=0 [UNWEIGHT] Wrote 204 events (found 1633 events) - [COUNTERS] PROGRAM TOTAL : 55.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3880s - [COUNTERS] Fortran MEs ( 1 ) : 52.9411s for 90112 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 55.2887s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3835s + [COUNTERS] Fortran MEs ( 1 ) : 52.9053s for 90112 events => throughput is 1.70E+03 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,429 +127,458 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277245934316645E-004] fbridge_mode=1 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 10.0600s - [COUNTERS] Fortran Overhead ( 0 ) : 5.1504s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9095s for 8192 events => throughput is 1.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 10.0233s + [COUNTERS] Fortran Overhead ( 0 ) : 5.1534s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8699s for 8192 events => throughput is 1.68E+03 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316645E-004) differ by less than 2E-14 (4.440892098500626e-16) *** (2-none) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** -3,4c3,4 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. -6,8c6,8 -< -6 1 1 2 0 504 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 505 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. -54,56c54,56 -< -6 1 1 2 0 505 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. -86,88c86,88 -< -6 1 1 2 0 504 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. -99,100c99,100 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. -103c103 -< 21 1 1 2 504 502 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. -115,116c115,116 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. -120c120 -< 21 1 1 2 505 503 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. -131c131 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. -134,135c134,135 -< -6 1 1 2 0 503 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. -150,152c150,152 -< -6 1 1 2 0 505 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. -166,168c166,168 -< -6 1 1 2 0 505 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. -179,180c179,180 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. -183c183 -< 21 1 1 2 504 502 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 503 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. -195,196c195,196 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. -199,200c199,200 -< 21 1 1 2 504 502 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. -211,212c211,212 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. -214,216c214,216 -< -6 1 1 2 0 504 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. -230,232c230,232 -< -6 1 1 2 0 505 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. -244c244 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. -246,247c246,247 -< -6 1 1 2 0 505 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 501 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. -291c291 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. -294,295c294,295 -< -6 1 1 2 0 503 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 503 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. -324c324 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. -327,328c327,328 -< 21 1 1 2 504 505 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. -355,356c355,356 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. -360c360 -< 21 1 1 2 505 503 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. -371,372c371,372 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. -374,375c374,375 -< -6 1 1 2 0 503 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. -387,388c387,388 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. -391,392c391,392 -< 21 1 1 2 504 502 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. -406,408c406,408 -< -6 1 1 2 0 505 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. -420c420 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. -422,423c422,423 -< -6 1 1 2 0 502 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. -438,440c438,440 -< -6 1 1 2 0 504 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. -451,452c451,452 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. -456c456 -< 21 1 1 2 505 503 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 505 502 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. -486,488c486,488 -< -6 1 1 2 0 505 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. -499,500c499,500 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. -502,504c502,504 -< -6 1 1 2 0 505 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. -515,516c515,516 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. -519,520c519,520 -< 21 1 1 2 504 502 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. -531,532c531,532 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. -534c534 -< -6 1 1 2 0 505 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. ---- -> -6 1 1 2 0 503 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. -536c536 -< 21 1 1 2 505 502 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 504 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. -547,548c547,548 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. -550,552c550,552 -< -6 1 1 2 0 505 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. -566,568c566,568 -< -6 1 1 2 0 504 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. -579,580c579,580 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. -583c583 -< 21 1 1 2 504 502 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. -598,600c598,600 -< -6 1 1 2 0 505 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. -614,616c614,616 -< -6 1 1 2 0 505 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. -628c628 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. -631,632c631,632 -< 21 1 1 2 504 505 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. -659,660c659,660 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. -662,663c662,663 -< -6 1 1 2 0 502 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. -675,676c675,676 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. -678,680c678,680 -< -6 1 1 2 0 503 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. -691,692c691,692 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. -694,696c694,696 -< -6 1 1 2 0 505 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. -707,708c707,708 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. -712c712 -< 21 1 1 2 505 502 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 503 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. -723,724c723,724 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. -726,728c726,728 -< -6 1 1 2 0 504 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. -740c740 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. -743,744c743,744 -< 21 1 1 2 504 505 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. -758,760c758,760 -< -6 1 1 2 0 504 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 503 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. -774,776c774,776 -< -6 1 1 2 0 504 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. -ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410199E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 60.6870s + [COUNTERS] Fortran Overhead ( 0 ) : 7.0983s + [COUNTERS] CudaCpp MEs ( 2 ) : 53.5888s for 90112 events => throughput is 1.68E+03 events/s + +*** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410199E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (2-none) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.734946e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.721402e+03 ) sec^-1 + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245934316656E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 5.2661s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7965s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4696s for 8192 events => throughput is 3.32E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316656E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410199E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 31.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7787s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.1420s for 90112 events => throughput is 3.32E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410199E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.411750e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.413295e+03 ) sec^-1 + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245934316656E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.8438s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6053s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2385s for 8192 events => throughput is 6.61E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316656E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410193E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 17.1622s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5778s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.5844s for 90112 events => throughput is 6.63E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410193E-004) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.803132e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.805300e+03 ) sec^-1 + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245934316656E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.5386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4482s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0904s for 8192 events => throughput is 7.51E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316656E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410193E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 15.4311s + [COUNTERS] Fortran Overhead ( 0 ) : 3.4269s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0042s for 90112 events => throughput is 7.51E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410193E-004) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.510588e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.778403e+03 ) sec^-1 + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245934316656E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.9467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6613s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2855s for 8192 events => throughput is 6.37E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316656E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410193E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 17.7944s + [COUNTERS] Fortran Overhead ( 0 ) : 3.6477s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1467s for 90112 events => throughput is 6.37E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410193E-004) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.463641e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.469413e+03 ) sec^-1 + +*** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245934316656E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 0.9517s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9181s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0336s for 8192 events => throughput is 2.44E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245934316656E-004) differ by less than 2E-14 (6.661338147750939e-16) + +*** (3) Compare GMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE GMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949688410188E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 3.2255s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3474s for 90112 events => throughput is 2.59E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949688410188E-004) differ by less than 2E-14 (8.881784197001252e-16) + +*** (3) Compare GMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.252028e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.493301e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.110058e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.159093e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125395e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.156347e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.120834e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.455337e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index a06bd391a4..c5a04c551d 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -5,33 +5,33 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_11:27:25 +DATE: 2022-12-18_02:40:39 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 5.0967s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2839s - [COUNTERS] Fortran MEs ( 1 ) : 4.8128s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s + [COUNTERS] Fortran MEs ( 1 ) : 4.8129s for 8192 events => throughput is 1.70E+03 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277245934316629E-004] fbridge_mode=0 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 5.2171s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3999s - [COUNTERS] Fortran MEs ( 1 ) : 4.8172s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.2065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s + [COUNTERS] Fortran MEs ( 1 ) : 4.8123s for 8192 events => throughput is 1.70E+03 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803949688410202E-004] fbridge_mode=0 [UNWEIGHT] Wrote 204 events (found 1633 events) - [COUNTERS] PROGRAM TOTAL : 55.3592s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3868s - [COUNTERS] Fortran MEs ( 1 ) : 52.9724s for 90112 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 55.3166s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3841s + [COUNTERS] Fortran MEs ( 1 ) : 52.9325s for 90112 events => throughput is 1.70E+03 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,429 +127,458 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277365128548311E-004] fbridge_mode=1 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 9.6487s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9579s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.6908s for 8192 events => throughput is 1.75E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.6474s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9594s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.6880s for 8192 events => throughput is 1.75E+03 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277365128548311E-004) differ by less than 2E-4 (3.285647204309683e-06) *** (2-none) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** -3,4c3,4 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. -6,8c6,8 -< -6 1 1 2 0 504 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 505 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. -54,56c54,56 -< -6 1 1 2 0 505 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. -86,88c86,88 -< -6 1 1 2 0 504 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. -99,100c99,100 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. -103c103 -< 21 1 1 2 504 502 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. -115,116c115,116 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. -120c120 -< 21 1 1 2 505 503 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. -131c131 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. -134,135c134,135 -< -6 1 1 2 0 503 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. -150,152c150,152 -< -6 1 1 2 0 505 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. -166,168c166,168 -< -6 1 1 2 0 505 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. -179,180c179,180 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. -183c183 -< 21 1 1 2 504 502 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 503 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. -195,196c195,196 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. -199,200c199,200 -< 21 1 1 2 504 502 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. -211,212c211,212 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. -214,216c214,216 -< -6 1 1 2 0 504 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. -230,232c230,232 -< -6 1 1 2 0 505 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. -244c244 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. -246,247c246,247 -< -6 1 1 2 0 505 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 501 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. -291c291 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. -294,295c294,295 -< -6 1 1 2 0 503 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 503 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. -324c324 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. -327,328c327,328 -< 21 1 1 2 504 505 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. -355,356c355,356 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. -360c360 -< 21 1 1 2 505 503 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. -371,372c371,372 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. -374,375c374,375 -< -6 1 1 2 0 503 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. -387,388c387,388 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. -391,392c391,392 -< 21 1 1 2 504 502 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. -406,408c406,408 -< -6 1 1 2 0 505 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. -420c420 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. -422,423c422,423 -< -6 1 1 2 0 502 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. -438,440c438,440 -< -6 1 1 2 0 504 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. -451,452c451,452 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. -456c456 -< 21 1 1 2 505 503 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 505 502 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. -486,488c486,488 -< -6 1 1 2 0 505 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. -499,500c499,500 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. -502,504c502,504 -< -6 1 1 2 0 505 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. -515,516c515,516 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. -519,520c519,520 -< 21 1 1 2 504 502 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. -531,532c531,532 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. -534c534 -< -6 1 1 2 0 505 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. ---- -> -6 1 1 2 0 503 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. -536c536 -< 21 1 1 2 505 502 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 504 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. -547,548c547,548 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. -550,552c550,552 -< -6 1 1 2 0 505 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. -566,568c566,568 -< -6 1 1 2 0 504 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. -579,580c579,580 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. -583c583 -< 21 1 1 2 504 502 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. -598,600c598,600 -< -6 1 1 2 0 505 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. -614,616c614,616 -< -6 1 1 2 0 505 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. -628c628 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. -631,632c631,632 -< 21 1 1 2 504 505 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. -659,660c659,660 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. -662,663c662,663 -< -6 1 1 2 0 502 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. -675,676c675,676 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. -678,680c678,680 -< -6 1 1 2 0 503 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. -691,692c691,692 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. -694,696c694,696 -< -6 1 1 2 0 505 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. -707,708c707,708 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. -712c712 -< 21 1 1 2 505 502 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 503 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. -723,724c723,724 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. -726,728c726,728 -< -6 1 1 2 0 504 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. -740c740 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. -743,744c743,744 -< 21 1 1 2 504 505 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. -758,760c758,760 -< -6 1 1 2 0 504 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 503 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. -774,776c774,776 -< -6 1 1 2 0 504 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. -ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803998544755928E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 58.5567s + [COUNTERS] Fortran Overhead ( 0 ) : 6.9220s + [COUNTERS] CudaCpp MEs ( 2 ) : 51.6347s for 90112 events => throughput is 1.75E+03 events/s + +*** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803998544755928E-004) differ by less than 2E-4 (3.09140099097327e-06) + +*** (2-none) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.800809e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.801856e+03 ) sec^-1 + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277357563876386E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.7520s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5598s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1922s for 8192 events => throughput is 6.87E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277357563876386E-004) differ by less than 2E-4 (3.0771233283655164e-06) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803992549649203E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 16.6713s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5388s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.1325s for 90112 events => throughput is 6.86E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803992549649203E-004) differ by less than 2E-4 (2.7120586845175154e-06) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.024806e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.034343e+03 ) sec^-1 + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277361789689938E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 1.6046s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6143s for 8192 events => throughput is 1.33E+04 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277361789689938E-004) differ by less than 2E-4 (3.1936099427198883e-06) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803994805545430E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 9.7335s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9730s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7605s for 90112 events => throughput is 1.33E+04 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803994805545430E-004) differ by less than 2E-4 (2.8548012438456283e-06) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.373619e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.372146e+04 ) sec^-1 + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277361789689938E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 1.4714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9262s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5452s for 8192 events => throughput is 1.50E+04 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277361789689938E-004) differ by less than 2E-4 (3.1936099427198883e-06) + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803994805545430E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 8.9081s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9041s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.0040s for 90112 events => throughput is 1.50E+04 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803994805545430E-004) differ by less than 2E-4 (2.8548012438456283e-06) + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.542030e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.543882e+04 ) sec^-1 + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277366834693741E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 1.6636s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0254s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6382s for 8192 events => throughput is 1.28E+04 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277366834693741E-004) differ by less than 2E-4 (3.332677936285222e-06) + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803998089876381E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 10.0386s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0111s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.0275s for 90112 events => throughput is 1.28E+04 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803998089876381E-004) differ by less than 2E-4 (3.0626183411541774e-06) + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.302119e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.299011e+04 ) sec^-1 + +*** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277369095377715E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 0.7302s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.79E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277369095377715E-004) differ by less than 2E-4 (3.3949947939593272e-06) + +*** (3) Compare GMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE GMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5804003934173863E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 3.0967s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8585s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2382s for 90112 events => throughput is 3.78E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5804003934173863E-004) differ by less than 2E-4 (3.432418144377891e-06) + +*** (3) Compare GMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.540271e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.875593e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.443783e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.609817e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.421012e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.605300e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.442276e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.506482e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 5294b29eab..9cfa246448 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,36 +2,36 @@ Working directory (build): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_11:28:43 +DATE: 2022-12-18_02:44:27 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 5.1006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2835s - [COUNTERS] Fortran MEs ( 1 ) : 4.8171s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2857s + [COUNTERS] Fortran MEs ( 1 ) : 4.8068s for 8192 events => throughput is 1.70E+03 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/o [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277245934316629E-004] fbridge_mode=0 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 5.2011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3911s - [COUNTERS] Fortran MEs ( 1 ) : 4.8100s for 8192 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.1998s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3926s + [COUNTERS] Fortran MEs ( 1 ) : 4.8072s for 8192 events => throughput is 1.70E+03 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803949688410202E-004] fbridge_mode=0 [UNWEIGHT] Wrote 204 events (found 1633 events) - [COUNTERS] PROGRAM TOTAL : 55.3357s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3787s - [COUNTERS] Fortran MEs ( 1 ) : 52.9570s for 90112 events => throughput is 1.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 55.2757s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3801s + [COUNTERS] Fortran MEs ( 1 ) : 52.8956s for 90112 events => throughput is 1.70E+03 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,429 +127,458 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277246055886695E-004] fbridge_mode=1 [UNWEIGHT] Wrote 49 events (found 738 events) - [COUNTERS] PROGRAM TOTAL : 10.0393s - [COUNTERS] Fortran Overhead ( 0 ) : 5.1471s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8922s for 8192 events => throughput is 1.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 10.0389s + [COUNTERS] Fortran Overhead ( 0 ) : 5.1480s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8909s for 8192 events => throughput is 1.67E+03 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277246055886695E-004) differ by less than 2E-4 (3.3511382557804836e-09) *** (2-none) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** -3,4c3,4 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.13289043826E+04 0.13289043826E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.81223316322E+02 0.81223316322E+02 0.00000000000E+00 0. -1. -6,8c6,8 -< -6 1 1 2 0 504 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 505 0.39403209480E+02 -0.10079469096E+02 0.28578226692E+03 0.33653337532E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.19269775075E+03 0.33434234480E+02 0.26595208036E+03 0.33012237159E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.20498361398E+02 0.29398294961E+02 0.12436578484E+03 0.12942677855E+03 0.00000000000E+00 0. -1. -54,56c54,56 -< -6 1 1 2 0 505 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.12539878316E+03 0.25084537686E+03 0.17266798312E+03 0.37201006747E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.91559552940E+02 -0.56451043237E+03 0.74367925168E+03 0.93814391719E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.59178509296E+01 -0.20888672560E+02 0.16637826240E+02 0.27352785287E+02 0.00000000000E+00 0. 1. -86,88c86,88 -< -6 1 1 2 0 504 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.21874707030E+03 0.13546343073E+03 0.25532276652E+03 0.40164578540E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.71752123105E+03 -0.62228037306E+02 0.22299504958E+03 0.75394684015E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11370550817E+03 0.19005264160E+02 -0.20103500671E+03 0.23174385985E+03 0.00000000000E+00 0. -1. -99,100c99,100 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.80173305346E+03 0.80173305346E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.62467339451E+03 0.62467339451E+03 0.00000000000E+00 0. -1. -103c103 -< 21 1 1 2 504 502 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 -0.47480153194E+03 -0.35244666584E+03 0.73446475215E+02 0.59586032903E+03 0.00000000000E+00 0. -1. -115,116c115,116 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.40182492101E+03 0.40182492101E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.31260679922E+03 0.31260679922E+03 0.00000000000E+00 0. -1. -120c120 -< 21 1 1 2 505 503 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 -0.15497496606E+02 0.49927883956E+02 0.10702696535E+02 0.53362099945E+02 0.00000000000E+00 0. 1. -131c131 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.96278959232E+03 0.96278959232E+03 0.00000000000E+00 0. -1. -134,135c134,135 -< -6 1 1 2 0 503 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.21857372889E+03 0.15145241426E+03 0.45066058276E+02 0.32042512130E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.85780113425E+03 0.14260763483E+03 -0.32664683767E+03 0.92890143717E+03 0.00000000000E+00 0. -1. -150,152c150,152 -< -6 1 1 2 0 505 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.13931913890E+02 0.17750922096E+03 0.35180277953E+03 0.43057846838E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.68773880518E+02 -0.15629672638E+03 0.29558261724E+03 0.34136138759E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.31808842154E+02 -0.58814069105E+02 0.18302728040E+03 0.19485862191E+03 0.00000000000E+00 0. 1. -166,168c166,168 -< -6 1 1 2 0 505 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.20706200236E+03 -0.23262072670E+03 -0.78433215568E+03 0.86144820259E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.11364979994E+03 0.36077625749E+03 -0.17519459630E+03 0.41685600819E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 -0.24972583892E+03 0.76450798261E+02 -0.37831215827E+03 0.45970404422E+03 0.00000000000E+00 0. 1. -179,180c179,180 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.98635404392E+02 0.98635404392E+02 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19738864971E+04 0.19738864971E+04 0.00000000000E+00 0. 1. -183c183 -< 21 1 1 2 504 502 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 503 0.40854483793E+02 -0.12391382476E+03 -0.56339970115E+03 0.57831042535E+03 0.00000000000E+00 0. 1. -195,196c195,196 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.48692914599E+03 0.48692914599E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42777476019E+03 0.42777476019E+03 0.00000000000E+00 0. 1. -199,200c199,200 -< 21 1 1 2 504 502 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.11483456115E+03 -0.18451124114E+03 0.28411941872E+03 0.35770828706E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.40040892554E+02 -0.40819720748E+02 -0.34960525373E+02 0.67020601406E+02 0.00000000000E+00 0. 1. -211,212c211,212 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.59917676920E+03 0.59917676920E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.17083712269E+04 0.17083712269E+04 0.00000000000E+00 0. -1. -214,216c214,216 -< -6 1 1 2 0 504 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.10657108230E+02 -0.33922771988E+03 -0.46992979638E+03 0.60493969404E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.47981075109E+03 0.61702976492E+03 -0.40969291350E+02 0.78270209563E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.35726018887E+03 -0.31391496917E+03 -0.32619534386E+03 0.57669823372E+03 0.00000000000E+00 0. -1. -230,232c230,232 -< -6 1 1 2 0 505 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.22482853132E+03 -0.61820999175E+02 -0.20240075959E+03 0.35392763656E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.18791203660E+03 0.63561496588E+02 0.15365486866E+03 0.25091993945E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.11487633146E+03 -0.13416331310E+03 0.14095971748E+03 0.22597789287E+03 0.00000000000E+00 0. -1. -244c244 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.26737024844E+03 0.26737024844E+03 0.00000000000E+00 0. -1. -246,247c246,247 -< -6 1 1 2 0 505 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 501 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.28177467352E+02 0.14804686680E+03 0.30235403396E+02 0.23141958442E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.21172919086E+02 -0.84430272724E+02 0.46880125199E+01 0.87170757231E+02 0.00000000000E+00 0. -1. -291c291 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.61663795165E+03 0.61663795165E+03 0.00000000000E+00 0. 1. -294,295c294,295 -< -6 1 1 2 0 503 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.24303968755E+02 -0.97928126209E+02 0.15074155660E+03 0.25066435264E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 503 0.12620887057E+03 -0.12706408772E+03 0.46477623488E+02 0.18502467642E+03 0.00000000000E+00 0. 1. -324c324 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.22170284178E+03 0.22170284178E+03 0.00000000000E+00 0. -1. -327,328c327,328 -< 21 1 1 2 504 505 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.36346166898E+02 -0.73060382077E+02 -0.96989669211E+02 0.12675117045E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.18381769511E+02 0.14852420334E+03 0.55525657110E+02 0.15962589711E+03 0.00000000000E+00 0. -1. -355,356c355,356 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.74721788721E+03 0.74721788721E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.68582026055E+03 0.68582026055E+03 0.00000000000E+00 0. -1. -360c360 -< 21 1 1 2 505 503 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 502 0.36544140559E+02 0.20612509127E+02 0.23512027531E+02 0.48095375873E+02 0.00000000000E+00 0. 1. -371,372c371,372 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.64237774275E+02 0.64237774275E+02 0.00000000000E+00 0. 1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.12533760551E+04 0.12533760551E+04 0.00000000000E+00 0. 1. -374,375c374,375 -< -6 1 1 2 0 503 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.63988507197E+02 -0.48336095319E+02 -0.43608803810E+03 0.47595449797E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.12221016945E+03 0.51559952731E+02 -0.13503359931E+03 0.18928240062E+03 0.00000000000E+00 0. 1. -387,388c387,388 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.16316354202E+04 0.16316354202E+04 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.33148240180E+03 0.33148240180E+03 0.00000000000E+00 0. 1. -391,392c391,392 -< 21 1 1 2 504 502 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 -0.59343447897E+03 -0.34529561999E+02 0.61306875044E+03 0.85393797448E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.18686731050E+03 0.20648812836E+03 0.35627777311E+03 0.45220635831E+03 0.00000000000E+00 0. 1. -406,408c406,408 -< -6 1 1 2 0 505 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.21464427393E+03 -0.12110518135E+03 -0.56477133229E+02 0.30636138114E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.33290316793E+03 0.37149729646E+03 -0.40518106541E+03 0.64265578365E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 503 0.16165043624E+03 -0.16767027538E+03 -0.16990350120E+03 0.28829045164E+03 0.00000000000E+00 0. 1. -420c420 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.89421845094E+03 0.89421845094E+03 0.00000000000E+00 0. 1. -422,423c422,423 -< -6 1 1 2 0 502 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.54431632830E+02 -0.23085593672E+02 0.29418937980E+02 0.18517618961E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 502 -0.83256024246E+02 0.77736473587E+02 -0.45416156269E+03 0.46822777569E+03 0.00000000000E+00 0. 1. -438,440c438,440 -< -6 1 1 2 0 504 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.26429476746E+02 0.13107250238E+03 0.83932374694E+03 0.86733607690E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 0.28660701548E+02 -0.67892405610E+02 0.80145852177E+02 0.10887686703E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.61017958141E+02 -0.41225933505E+02 0.21222438665E+02 0.76636549452E+02 0.00000000000E+00 0. -1. -451,452c451,452 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.60949526212E+03 0.60949526212E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.50457739686E+03 0.50457739686E+03 0.00000000000E+00 0. -1. -456c456 -< 21 1 1 2 505 503 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 505 502 0.86030814077E+01 -0.47120489445E+02 0.51446776447E+02 0.70293131542E+02 0.00000000000E+00 0. -1. -486,488c486,488 -< -6 1 1 2 0 505 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.30886141756E+02 -0.16430824340E+03 -0.84066899690E+03 0.87441667125E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 0.13118182004E+03 0.10973355069E+03 -0.39134396895E+03 0.42708339243E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.19024439872E+02 -0.81763658921E+02 -0.21758923733E+03 0.23322157154E+03 0.00000000000E+00 0. -1. -499,500c499,500 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.26577069905E+03 0.26577069905E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.42974946867E+03 0.42974946867E+03 0.00000000000E+00 0. -1. -502,504c502,504 -< -6 1 1 2 0 505 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 -0.13300555345E+03 -0.11630906445E+03 -0.37665370341E+01 0.24730843601E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 -0.43728188693E+02 0.68187246486E+02 -0.38878884523E+01 0.81097291856E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 0.14557470704E+03 -0.12755766678E+02 -0.70110299278E+02 0.16208071748E+03 0.00000000000E+00 0. -1. -515,516c515,516 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.15265142404E+03 0.15265142404E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.67951909106E+03 0.67951909106E+03 0.00000000000E+00 0. -1. -519,520c519,520 -< 21 1 1 2 504 502 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.64668073700E+02 -0.22663476572E+02 -0.15548229867E+02 0.70266210787E+02 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.86157343699E+02 0.54861376029E+02 -0.85547240486E+02 0.13323358739E+03 0.00000000000E+00 0. -1. -531,532c531,532 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.94803553158E+03 0.94803553158E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.30665065164E+03 0.30665065164E+03 0.00000000000E+00 0. 1. -534c534 -< -6 1 1 2 0 505 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. ---- -> -6 1 1 2 0 503 0.10212537215E+03 0.29992209329E+03 0.29620049631E+03 0.46695458848E+03 0.17300000000E+03 0. 1. -536c536 -< 21 1 1 2 505 502 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 504 0.11283259483E+03 0.14157613141E+02 0.42722501058E+02 0.12147775337E+03 0.00000000000E+00 0. 1. -547,548c547,548 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.47606275146E+03 0.47606275146E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.12049860080E+04 0.12049860080E+04 0.00000000000E+00 0. -1. -550,552c550,552 -< -6 1 1 2 0 505 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.97882649898E+02 0.16297450200E+03 -0.19720597644E+02 0.25779760166E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.28538509898E+03 -0.52873443017E+03 -0.65555368388E+03 0.88924427736E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 0.12295736208E+03 0.67300180088E+02 0.76504849312E+02 0.15968975890E+03 0.00000000000E+00 0. -1. -566,568c566,568 -< -6 1 1 2 0 504 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 503 -0.87875296856E+01 0.19184114946E+03 -0.43964080569E+02 0.26218712341E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.14494537025E+03 -0.38971629371E+03 -0.68618045808E+03 0.80232884218E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.48355400491E+01 -0.94154538055E+02 0.12397164811E+02 0.95090216002E+02 0.00000000000E+00 0. -1. -579,580c579,580 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.77469255466E+03 0.77469255466E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.10813002608E+04 0.10813002608E+04 0.00000000000E+00 0. -1. -583c583 -< 21 1 1 2 504 502 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 503 0.55152957698E+03 -0.65201833850E+03 -0.16583908850E+03 0.86995137295E+03 0.00000000000E+00 0. -1. -598,600c598,600 -< -6 1 1 2 0 505 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 502 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.15718148546E+03 -0.24851431682E+02 0.35856848216E+02 0.23777789341E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.23107467438E+03 -0.16406600020E+03 0.59477379497E+03 0.65883914937E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 502 -0.88374140735E+02 0.12563521121E+03 0.16770635529E+03 0.22741947289E+03 0.00000000000E+00 0. 1. -614,616c614,616 -< -6 1 1 2 0 505 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 504 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 0.11116666631E+03 -0.51398409506E+02 0.25242989690E+03 0.32962050459E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 505 -0.15250226778E+03 -0.11176768196E+02 -0.80605642490E+02 0.17285581109E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 503 -0.53378502621E+02 -0.36915136015E+02 0.11915764877E+02 0.65984674445E+02 0.00000000000E+00 0. 1. -628c628 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.30704401996E+03 0.30704401996E+03 0.00000000000E+00 0. -1. -631,632c631,632 -< 21 1 1 2 504 505 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 504 501 0.17407121683E+02 -0.45886173803E+03 0.98272161589E+02 0.46958973611E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.11454468903E+03 0.26203389730E+03 0.11151729820E+03 0.30695008865E+03 0.00000000000E+00 0. 1. -659,660c659,660 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.19878691327E+04 0.19878691327E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.99887434647E+03 0.99887434647E+03 0.00000000000E+00 0. 1. -662,663c662,663 -< -6 1 1 2 0 502 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 505 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 504 -0.70772535533E+02 0.36186357491E+03 0.79860421170E+03 0.89646622110E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 503 -0.17681242702E+03 -0.56414514974E+03 -0.77225269752E+03 0.97257216347E+03 0.00000000000E+00 0. 1. -675,676c675,676 -< 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.79907847242E+03 0.79907847242E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.11577536833E+03 0.11577536833E+03 0.00000000000E+00 0. -1. -678,680c678,680 -< -6 1 1 2 0 503 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 505 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.42321248541E+01 -0.89969962167E+02 0.11768892797E+03 0.22779857054E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 0.58019418893E+02 -0.42703378784E+01 0.64376315681E+02 0.86768650877E+02 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.58800425760E+02 0.32754574443E+02 0.18481203843E+02 0.69799048074E+02 0.00000000000E+00 0. -1. -691,692c691,692 -< 21 -1 0 0 501 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.87478592262E+03 0.87478592262E+03 0.00000000000E+00 0. 1. -> 21 -1 0 0 501 503 -0.00000000000E+00 -0.00000000000E+00 -0.16023103971E+03 0.16023103971E+03 0.00000000000E+00 0. 1. -694,696c694,696 -< -6 1 1 2 0 505 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 503 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 504 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 504 0.51094356582E+02 -0.40347360213E+02 0.29339796138E+03 0.34677068287E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 505 0.13529064158E+03 0.15714107453E+03 0.10977636523E+03 0.23462251675E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 502 0.18275955805E+02 0.16416297636E+02 0.52338835483E+02 0.57817463525E+02 0.00000000000E+00 0. -1. -707,708c707,708 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 504 502 0.00000000000E+00 0.00000000000E+00 0.86193635545E+03 0.86193635545E+03 0.00000000000E+00 0. -1. -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.21781596149E+03 0.21781596149E+03 0.00000000000E+00 0. -1. -712c712 -< 21 1 1 2 505 502 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. ---- -> 21 1 1 2 505 503 -0.32336112435E+02 -0.88941145944E+02 0.17880896684E+03 0.20230867068E+03 0.00000000000E+00 0. 1. -723,724c723,724 -< 21 -1 0 0 505 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.17158840285E+04 0.17158840285E+04 0.00000000000E+00 0. -1. -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.19926995597E+03 0.19926995597E+03 0.00000000000E+00 0. -1. -726,728c726,728 -< -6 1 1 2 0 504 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.13494095331E+03 0.12732567530E+03 0.41986177104E+03 0.49054438662E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.36929525166E+03 0.55262020840E+02 0.20540407587E+03 0.42617333121E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.68446901696E+02 -0.16199453235E+03 0.19946776735E+03 0.26592216356E+03 0.00000000000E+00 0. -1. -740c740 -< 21 -1 0 0 504 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. ---- -> 21 -1 0 0 505 503 -0.00000000000E+00 -0.00000000000E+00 -0.20717130236E+04 0.20717130236E+04 0.00000000000E+00 0. -1. -743,744c743,744 -< 21 1 1 2 504 505 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. ---- -> 21 1 1 2 504 501 -0.51917617014E+03 -0.41711433140E+03 -0.10951499650E+04 0.12817494712E+04 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 -0.38623106687E+02 -0.16179945446E+03 -0.19265782928E+03 0.25453456939E+03 0.00000000000E+00 0. -1. -758,760c758,760 -< -6 1 1 2 0 504 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -< 21 1 1 2 504 503 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -< 21 1 1 2 505 501 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. ---- -> -6 1 1 2 0 503 0.99857720003E+02 -0.10433176492E+01 -0.72706788852E+03 0.75400886419E+03 0.17300000000E+03 0. -1. -> 21 1 1 2 504 501 -0.44329997817E+03 0.91326222484E+02 -0.65904825990E+03 0.79949981766E+03 0.00000000000E+00 0. 1. -> 21 1 1 2 505 504 0.38402699303E+02 -0.12844982940E+03 -0.14851741956E+03 0.20007885920E+03 0.00000000000E+00 0. 1. -774,776c774,776 -< -6 1 1 2 0 504 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -< 21 1 1 2 504 502 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -< 21 1 1 2 505 501 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. ---- -> -6 1 1 2 0 502 0.16855730097E+03 0.29610135927E+00 -0.32785897876E+03 0.40722495177E+03 0.17300000000E+03 0. 1. -> 21 1 1 2 504 501 -0.42203367704E+02 -0.12545475516E+03 -0.26604125354E+02 0.13501036747E+03 0.00000000000E+00 0. -1. -> 21 1 1 2 505 504 0.10399608402E+03 -0.15451437807E+02 -0.65273222246E+02 0.12375187256E+03 0.00000000000E+00 0. -1. -ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949752893939E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 61.0438s + [COUNTERS] Fortran Overhead ( 0 ) : 7.1209s + [COUNTERS] CudaCpp MEs ( 2 ) : 53.9229s for 90112 events => throughput is 1.67E+03 events/s + +*** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949752893939E-004) differ by less than 2E-4 (4.080229265568391e-09) + +*** (2-none) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.731406e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.729577e+03 ) sec^-1 + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277246053730743E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 5.1852s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7589s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4263s for 8192 events => throughput is 3.38E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277246053730743E-004) differ by less than 2E-4 (3.291708461361509e-09) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949756035068E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 31.4958s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7561s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7397s for 90112 events => throughput is 3.37E+03 events/s + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949756035068E-004) differ by less than 2E-4 (4.278985166195071e-09) + +*** (2-sse4) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.465353e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.461263e+03 ) sec^-1 + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277246042502181E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.7964s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5815s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2149s for 8192 events => throughput is 6.74E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277246042502181E-004) differ by less than 2E-4 (2.982187607969422e-09) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949750534138E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 16.9375s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5604s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.3771s for 90112 events => throughput is 6.74E+03 events/s + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949750534138E-004) differ by less than 2E-4 (3.93091204031748e-09) + +*** (2-avx2) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.903924e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.903662e+03 ) sec^-1 + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277246042502181E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.5580s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4673s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0908s for 8192 events => throughput is 7.51E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277246042502181E-004) differ by less than 2E-4 (2.982187607969422e-09) + +*** (2-512y) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949750534138E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 15.4404s + [COUNTERS] Fortran Overhead ( 0 ) : 3.4313s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0091s for 90112 events => throughput is 7.50E+03 events/s + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949750534138E-004) differ by less than 2E-4 (3.93091204031748e-09) + +*** (2-512y) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.773967e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.770322e+03 ) sec^-1 + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277246042502181E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 2.9791s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6795s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2996s for 8192 events => throughput is 6.30E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277246042502181E-004) differ by less than 2E-4 (2.982187607969422e-09) + +*** (2-512z) Compare CMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE CMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949750534138E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 17.9605s + [COUNTERS] Fortran Overhead ( 0 ) : 3.6600s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3005s for 90112 events => throughput is 6.30E+03 events/s + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949750534138E-004) differ by less than 2E-4 (3.93091204031748e-09) + +*** (2-512z) Compare CMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.385868e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.369956e+03 ) sec^-1 + +*** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277245916352716E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 49 events (found 738 events) + [COUNTERS] PROGRAM TOTAL : 0.9537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 8192 events => throughput is 2.45E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** + +OK! xsec from fortran (3.6277245934316629E-004) and cpp (3.6277245916352716E-004) differ by less than 2E-4 (4.95184004911664e-10) + +*** (3) Compare GMADEVENT_CUDACPP x1 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE GMADEVENT_CUDACPP x10 (create events.lhe) *** +-------------------- ++1 ! Fortran bridge mode (CppOnly=1, FortranOnly=0, BothQuiet=-1, BothDebug=-2) +8192 ! Number of events in a single C++ or CUDA iteration (VECSIZE_USED) +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803949678703906E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 204 events (found 1633 events) + [COUNTERS] PROGRAM TOTAL : 3.2266s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8741s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3525s for 90112 events => throughput is 2.56E+05 events/s + +*** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** + +OK! xsec from fortran (1.5803949688410202E-004) and cpp (1.5803949678703906E-004) differ by less than 2E-4 (6.141689379290938e-10) + +*** (3) Compare GMADEVENT_CUDACPP x10 events.lhe to MADEVENT events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.259618e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.486305e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.101298e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.156056e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.101489e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.158927e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.110555e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.441830e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index fdf863d237..80c7ff610d 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -10,10 +10,9 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -23,7 +22,8 @@ make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -31,7 +31,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_11:30:01 +DATE: 2022-12-18_02:49:18 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 111.1725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5593s - [COUNTERS] Fortran MEs ( 1 ) : 110.6132s for 8192 events => throughput is 7.41E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5546s + [COUNTERS] Fortran MEs ( 1 ) : 110.4462s for 8192 events => throughput is 7.42E+01 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146880E-006] fbridge_mode=0 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 111.2912s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6057s - [COUNTERS] Fortran MEs ( 1 ) : 110.6854s for 8192 events => throughput is 7.40E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6056s + [COUNTERS] Fortran MEs ( 1 ) : 110.4122s for 8192 events => throughput is 7.42E+01 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976056E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1222.0723s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0236s - [COUNTERS] Fortran MEs ( 1 ) : 1217.0487s for 90112 events => throughput is 7.40E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1220.9983s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0153s + [COUNTERS] Fortran MEs ( 1 ) : 1215.9830s for 90112 events => throughput is 7.41E+01 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146895E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 247.4445s - [COUNTERS] Fortran Overhead ( 0 ) : 113.1318s - [COUNTERS] CudaCpp MEs ( 2 ) : 134.3127s for 8192 events => throughput is 6.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 246.9866s + [COUNTERS] Fortran Overhead ( 0 ) : 112.8981s + [COUNTERS] CudaCpp MEs ( 2 ) : 134.0885s for 8192 events => throughput is 6.11E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976074E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1595.7502s - [COUNTERS] Fortran Overhead ( 0 ) : 117.3666s - [COUNTERS] CudaCpp MEs ( 2 ) : 1478.3837s for 90112 events => throughput is 6.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1589.8020s + [COUNTERS] Fortran Overhead ( 0 ) : 117.3932s + [COUNTERS] CudaCpp MEs ( 2 ) : 1472.4088s for 90112 events => throughput is 6.12E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.320497e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.307137e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.317505e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.299353e+01 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146904E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 126.1293s - [COUNTERS] Fortran Overhead ( 0 ) : 58.2410s - [COUNTERS] CudaCpp MEs ( 2 ) : 67.8883s for 8192 events => throughput is 1.21E+02 events/s + [COUNTERS] PROGRAM TOTAL : 129.0153s + [COUNTERS] Fortran Overhead ( 0 ) : 59.0686s + [COUNTERS] CudaCpp MEs ( 2 ) : 69.9467s for 8192 events => throughput is 1.17E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976079E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 806.5309s - [COUNTERS] Fortran Overhead ( 0 ) : 62.4083s - [COUNTERS] CudaCpp MEs ( 2 ) : 744.1226s for 90112 events => throughput is 1.21E+02 events/s + [COUNTERS] PROGRAM TOTAL : 928.6810s + [COUNTERS] Fortran Overhead ( 0 ) : 75.9236s + [COUNTERS] CudaCpp MEs ( 2 ) : 852.7574s for 90112 events => throughput is 1.06E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.426194e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.401863e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.432360e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.402704e+02 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146895E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 64.6831s - [COUNTERS] Fortran Overhead ( 0 ) : 29.9037s - [COUNTERS] CudaCpp MEs ( 2 ) : 34.7795s for 8192 events => throughput is 2.36E+02 events/s + [COUNTERS] PROGRAM TOTAL : 65.0199s + [COUNTERS] Fortran Overhead ( 0 ) : 29.8625s + [COUNTERS] CudaCpp MEs ( 2 ) : 35.1575s for 8192 events => throughput is 2.33E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976077E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 416.7125s - [COUNTERS] Fortran Overhead ( 0 ) : 34.3175s - [COUNTERS] CudaCpp MEs ( 2 ) : 382.3950s for 90112 events => throughput is 2.36E+02 events/s + [COUNTERS] PROGRAM TOTAL : 420.8167s + [COUNTERS] Fortran Overhead ( 0 ) : 34.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 386.5343s for 90112 events => throughput is 2.33E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.802059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.811366e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.800831e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802408e+02 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146895E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 58.5130s - [COUNTERS] Fortran Overhead ( 0 ) : 26.7485s - [COUNTERS] CudaCpp MEs ( 2 ) : 31.7645s for 8192 events => throughput is 2.58E+02 events/s + [COUNTERS] PROGRAM TOTAL : 58.4375s + [COUNTERS] Fortran Overhead ( 0 ) : 26.6723s + [COUNTERS] CudaCpp MEs ( 2 ) : 31.7652s for 8192 events => throughput is 2.58E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976077E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 379.7518s - [COUNTERS] Fortran Overhead ( 0 ) : 31.0649s - [COUNTERS] CudaCpp MEs ( 2 ) : 348.6868s for 90112 events => throughput is 2.58E+02 events/s + [COUNTERS] PROGRAM TOTAL : 381.4201s + [COUNTERS] Fortran Overhead ( 0 ) : 31.1166s + [COUNTERS] CudaCpp MEs ( 2 ) : 350.3036s for 90112 events => throughput is 2.57E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.138594e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.127858e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.139414e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.139665e+02 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146895E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 55.5339s - [COUNTERS] Fortran Overhead ( 0 ) : 27.0168s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.5171s for 8192 events => throughput is 2.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 55.5962s + [COUNTERS] Fortran Overhead ( 0 ) : 27.0664s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.5298s for 8192 events => throughput is 2.87E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_d_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976077E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 346.6746s - [COUNTERS] Fortran Overhead ( 0 ) : 31.4664s - [COUNTERS] CudaCpp MEs ( 2 ) : 315.2081s for 90112 events => throughput is 2.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 345.9141s + [COUNTERS] Fortran Overhead ( 0 ) : 31.4666s + [COUNTERS] CudaCpp MEs ( 2 ) : 314.4475s for 90112 events => throughput is 2.87E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.101153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.105349e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.105261e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.106151e+02 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146902E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 4.4928s - [COUNTERS] Fortran Overhead ( 0 ) : 3.3926s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1003s for 8192 events => throughput is 7.45E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5095s + [COUNTERS] Fortran Overhead ( 0 ) : 3.4058s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1037s for 8192 events => throughput is 7.42E+03 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_d_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976074E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 19.8846s - [COUNTERS] Fortran Overhead ( 0 ) : 7.7291s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1556s for 90112 events => throughput is 7.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 19.8634s + [COUNTERS] Fortran Overhead ( 0 ) : 7.7218s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.1416s for 90112 events => throughput is 7.42E+03 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.375535e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.372528e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.842731e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.815995e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.292044e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.220799e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.587544e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.592757e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.323527e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.287761e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.487938e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.442512e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.263988e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.243769e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.230914e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237359e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 8105b5fe60..67cd632e2e 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -4,26 +4,26 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -31,7 +31,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_13:11:55 +DATE: 2022-12-18_04:33:16 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 111.2381s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5288s - [COUNTERS] Fortran MEs ( 1 ) : 110.7092s for 8192 events => throughput is 7.40E+01 events/s + [COUNTERS] PROGRAM TOTAL : 110.9526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5340s + [COUNTERS] Fortran MEs ( 1 ) : 110.4186s for 8192 events => throughput is 7.42E+01 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146880E-006] fbridge_mode=0 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 111.1892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6036s - [COUNTERS] Fortran MEs ( 1 ) : 110.5856s for 8192 events => throughput is 7.41E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0072s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6051s + [COUNTERS] Fortran MEs ( 1 ) : 110.4021s for 8192 events => throughput is 7.42E+01 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976056E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1226.4857s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0068s - [COUNTERS] Fortran MEs ( 1 ) : 1221.4789s for 90112 events => throughput is 7.38E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1220.5138s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0206s + [COUNTERS] Fortran MEs ( 1 ) : 1215.4932s for 90112 events => throughput is 7.41E+01 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694707410562672E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 235.3416s - [COUNTERS] Fortran Overhead ( 0 ) : 107.8791s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.4624s for 8192 events => throughput is 6.43E+01 events/s + [COUNTERS] PROGRAM TOTAL : 232.2129s + [COUNTERS] Fortran Overhead ( 0 ) : 107.1695s + [COUNTERS] CudaCpp MEs ( 2 ) : 125.0433s for 8192 events => throughput is 6.55E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361448795296743E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1508.9484s - [COUNTERS] Fortran Overhead ( 0 ) : 112.0287s - [COUNTERS] CudaCpp MEs ( 2 ) : 1396.9197s for 90112 events => throughput is 6.45E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1486.4542s + [COUNTERS] Fortran Overhead ( 0 ) : 111.4932s + [COUNTERS] CudaCpp MEs ( 2 ) : 1374.9611s for 90112 events => throughput is 6.55E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.653591e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.692140e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.655001e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.698838e+01 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694705650433302E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 56.9840s - [COUNTERS] Fortran Overhead ( 0 ) : 26.6488s - [COUNTERS] CudaCpp MEs ( 2 ) : 30.3352s for 8192 events => throughput is 2.70E+02 events/s + [COUNTERS] PROGRAM TOTAL : 56.4186s + [COUNTERS] Fortran Overhead ( 0 ) : 26.6090s + [COUNTERS] CudaCpp MEs ( 2 ) : 29.8096s for 8192 events => throughput is 2.75E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361448265670454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 366.2032s - [COUNTERS] Fortran Overhead ( 0 ) : 31.0347s - [COUNTERS] CudaCpp MEs ( 2 ) : 335.1686s for 90112 events => throughput is 2.69E+02 events/s + [COUNTERS] PROGRAM TOTAL : 358.7751s + [COUNTERS] Fortran Overhead ( 0 ) : 30.9752s + [COUNTERS] CudaCpp MEs ( 2 ) : 327.7999s for 90112 events => throughput is 2.75E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.149591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.155208e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.153731e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.154859e+02 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694704380988406E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 33.1285s - [COUNTERS] Fortran Overhead ( 0 ) : 15.4517s - [COUNTERS] CudaCpp MEs ( 2 ) : 17.6769s for 8192 events => throughput is 4.63E+02 events/s + [COUNTERS] PROGRAM TOTAL : 33.0822s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4026s + [COUNTERS] CudaCpp MEs ( 2 ) : 17.6797s for 8192 events => throughput is 4.63E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361448599495490E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 213.6590s - [COUNTERS] Fortran Overhead ( 0 ) : 19.8548s - [COUNTERS] CudaCpp MEs ( 2 ) : 193.8042s for 90112 events => throughput is 4.65E+02 events/s + [COUNTERS] PROGRAM TOTAL : 214.1528s + [COUNTERS] Fortran Overhead ( 0 ) : 19.7828s + [COUNTERS] CudaCpp MEs ( 2 ) : 194.3699s for 90112 events => throughput is 4.64E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.532887e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.537523e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.526568e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.546246e+02 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694704380988406E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 29.6145s - [COUNTERS] Fortran Overhead ( 0 ) : 13.6838s - [COUNTERS] CudaCpp MEs ( 2 ) : 15.9308s for 8192 events => throughput is 5.14E+02 events/s + [COUNTERS] PROGRAM TOTAL : 29.4229s + [COUNTERS] Fortran Overhead ( 0 ) : 13.6393s + [COUNTERS] CudaCpp MEs ( 2 ) : 15.7836s for 8192 events => throughput is 5.19E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361448599495490E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 194.0885s - [COUNTERS] Fortran Overhead ( 0 ) : 18.0226s - [COUNTERS] CudaCpp MEs ( 2 ) : 176.0659s for 90112 events => throughput is 5.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 190.8612s + [COUNTERS] Fortran Overhead ( 0 ) : 18.0278s + [COUNTERS] CudaCpp MEs ( 2 ) : 172.8333s for 90112 events => throughput is 5.21E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.256380e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300457e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.289137e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.304011e+02 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694706911026384E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 27.9958s - [COUNTERS] Fortran Overhead ( 0 ) : 13.7825s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2133s for 8192 events => throughput is 5.76E+02 events/s + [COUNTERS] PROGRAM TOTAL : 28.0271s + [COUNTERS] Fortran Overhead ( 0 ) : 13.8192s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.2079s for 8192 events => throughput is 5.77E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_f_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361454477781319E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 175.2320s - [COUNTERS] Fortran Overhead ( 0 ) : 18.1679s - [COUNTERS] CudaCpp MEs ( 2 ) : 157.0641s for 90112 events => throughput is 5.74E+02 events/s + [COUNTERS] PROGRAM TOTAL : 175.1886s + [COUNTERS] Fortran Overhead ( 0 ) : 18.2057s + [COUNTERS] CudaCpp MEs ( 2 ) : 156.9829s for 90112 events => throughput is 5.74E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.209233e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.234966e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.222132e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.227932e+02 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694709717502821E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 2.6687s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4856s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6901s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4911s for 8192 events => throughput is 1.67E+04 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_f_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361456124600774E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 11.9752s - [COUNTERS] Fortran Overhead ( 0 ) : 6.5187s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4565s for 90112 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.9462s + [COUNTERS] Fortran Overhead ( 0 ) : 6.5308s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4155s for 90112 events => throughput is 1.66E+04 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.627777e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.625716e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.637210e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.313575e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.323632e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.364517e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.407477e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.344175e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.308526e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.354818e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.354352e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.300396e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.313107e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.444031e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.457902e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 82930ad98a..c983b94762 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -10,19 +10,19 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. +make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -31,7 +31,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_14:29:55 +DATE: 2022-12-18_05:50:32 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -55,9 +55,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [] fbridge_mode=0 - [COUNTERS] PROGRAM TOTAL : 111.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5314s - [COUNTERS] Fortran MEs ( 1 ) : 110.6541s for 8192 events => throughput is 7.40E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0629s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5319s + [COUNTERS] Fortran MEs ( 1 ) : 110.5310s for 8192 events => throughput is 7.41E+01 events/s *** (1) EXECUTE MADEVENT x1 (create events.lhe) *** -------------------- @@ -79,9 +79,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039943146880E-006] fbridge_mode=0 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 111.2506s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6043s - [COUNTERS] Fortran MEs ( 1 ) : 110.6463s for 8192 events => throughput is 7.40E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.1181s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6063s + [COUNTERS] Fortran MEs ( 1 ) : 110.5118s for 8192 events => throughput is 7.41E+01 events/s *** (1) EXECUTE MADEVENT x10 (create events.lhe) *** -------------------- @@ -103,9 +103,9 @@ Executing ' ./madevent < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448797976056E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1223.7550s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0217s - [COUNTERS] Fortran MEs ( 1 ) : 1218.7333s for 90112 events => throughput is 7.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1221.7964s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0266s + [COUNTERS] Fortran MEs ( 1 ) : 1216.7698s for 90112 events => throughput is 7.41E+01 events/s *** (2-none) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -127,9 +127,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693040014636590E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 247.4597s - [COUNTERS] Fortran Overhead ( 0 ) : 113.9057s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.5541s for 8192 events => throughput is 6.13E+01 events/s + [COUNTERS] PROGRAM TOTAL : 252.5351s + [COUNTERS] Fortran Overhead ( 0 ) : 114.5393s + [COUNTERS] CudaCpp MEs ( 2 ) : 137.9958s for 8192 events => throughput is 5.94E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -159,9 +159,9 @@ Executing ' ./build.none_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448915086503E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 1585.7043s - [COUNTERS] Fortran Overhead ( 0 ) : 118.2394s - [COUNTERS] CudaCpp MEs ( 2 ) : 1467.4650s for 90112 events => throughput is 6.14E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1640.4967s + [COUNTERS] Fortran Overhead ( 0 ) : 119.1489s + [COUNTERS] CudaCpp MEs ( 2 ) : 1521.3478s for 90112 events => throughput is 5.92E+01 events/s *** (2-none) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -174,12 +174,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.256456e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.196086e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.251503e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.204593e+01 ) sec^-1 *** (2-sse4) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -201,9 +201,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693040018604290E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 120.7640s - [COUNTERS] Fortran Overhead ( 0 ) : 55.4155s - [COUNTERS] CudaCpp MEs ( 2 ) : 65.3484s for 8192 events => throughput is 1.25E+02 events/s + [COUNTERS] PROGRAM TOTAL : 121.6343s + [COUNTERS] Fortran Overhead ( 0 ) : 55.3773s + [COUNTERS] CudaCpp MEs ( 2 ) : 66.2570s for 8192 events => throughput is 1.24E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -233,9 +233,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448923322559E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 779.1134s - [COUNTERS] Fortran Overhead ( 0 ) : 59.7806s - [COUNTERS] CudaCpp MEs ( 2 ) : 719.3328s for 90112 events => throughput is 1.25E+02 events/s + [COUNTERS] PROGRAM TOTAL : 789.2572s + [COUNTERS] Fortran Overhead ( 0 ) : 59.7335s + [COUNTERS] CudaCpp MEs ( 2 ) : 729.5237s for 90112 events => throughput is 1.24E+02 events/s *** (2-sse4) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -248,12 +248,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.493944e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.498022e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.498454e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497610e+02 ) sec^-1 *** (2-avx2) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -275,9 +275,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693040019581793E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 62.5347s - [COUNTERS] Fortran Overhead ( 0 ) : 28.7513s - [COUNTERS] CudaCpp MEs ( 2 ) : 33.7834s for 8192 events => throughput is 2.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 62.8306s + [COUNTERS] Fortran Overhead ( 0 ) : 28.6497s + [COUNTERS] CudaCpp MEs ( 2 ) : 34.1810s for 8192 events => throughput is 2.40E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -307,9 +307,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448920691754E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 401.5913s - [COUNTERS] Fortran Overhead ( 0 ) : 33.1305s - [COUNTERS] CudaCpp MEs ( 2 ) : 368.4608s for 90112 events => throughput is 2.45E+02 events/s + [COUNTERS] PROGRAM TOTAL : 407.8940s + [COUNTERS] Fortran Overhead ( 0 ) : 33.0639s + [COUNTERS] CudaCpp MEs ( 2 ) : 374.8301s for 90112 events => throughput is 2.40E+02 events/s *** (2-avx2) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -322,12 +322,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.916645e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.920053e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.916155e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.920394e+02 ) sec^-1 *** (2-512y) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -349,9 +349,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693040019581793E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 55.6959s - [COUNTERS] Fortran Overhead ( 0 ) : 25.2919s - [COUNTERS] CudaCpp MEs ( 2 ) : 30.4040s for 8192 events => throughput is 2.69E+02 events/s + [COUNTERS] PROGRAM TOTAL : 55.8668s + [COUNTERS] Fortran Overhead ( 0 ) : 25.3509s + [COUNTERS] CudaCpp MEs ( 2 ) : 30.5159s for 8192 events => throughput is 2.68E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -381,9 +381,9 @@ Executing ' ./build.512y_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448920691754E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 363.4095s - [COUNTERS] Fortran Overhead ( 0 ) : 29.6822s - [COUNTERS] CudaCpp MEs ( 2 ) : 333.7273s for 90112 events => throughput is 2.70E+02 events/s + [COUNTERS] PROGRAM TOTAL : 364.7491s + [COUNTERS] Fortran Overhead ( 0 ) : 29.6400s + [COUNTERS] CudaCpp MEs ( 2 ) : 335.1091s for 90112 events => throughput is 2.69E+02 events/s *** (2-512y) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -396,12 +396,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.324790e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318931e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.323876e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319351e+02 ) sec^-1 *** (2-512z) EXECUTE CMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -423,9 +423,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693040019581793E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 54.9201s - [COUNTERS] Fortran Overhead ( 0 ) : 26.4513s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.4689s for 8192 events => throughput is 2.88E+02 events/s + [COUNTERS] PROGRAM TOTAL : 54.6509s + [COUNTERS] Fortran Overhead ( 0 ) : 26.4517s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.1992s for 8192 events => throughput is 2.91E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -455,9 +455,9 @@ Executing ' ./build.512z_m_inl0_hrd0/cmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448920691754E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 343.7163s - [COUNTERS] Fortran Overhead ( 0 ) : 30.8137s - [COUNTERS] CudaCpp MEs ( 2 ) : 312.9026s for 90112 events => throughput is 2.88E+02 events/s + [COUNTERS] PROGRAM TOTAL : 342.5804s + [COUNTERS] Fortran Overhead ( 0 ) : 30.8302s + [COUNTERS] CudaCpp MEs ( 2 ) : 311.7502s for 90112 events => throughput is 2.89E+02 events/s *** (2-512z) Compare CMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -470,12 +470,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.184197e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.180016e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.180428e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.184831e+02 ) sec^-1 *** (3) EXECUTE GMADEVENT_CUDACPP x1 (create events.lhe) *** -------------------- @@ -497,9 +497,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693039940476802E-006] fbridge_mode=1 [UNWEIGHT] Wrote 14 events (found 457 events) - [COUNTERS] PROGRAM TOTAL : 3.8262s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8789s for 8192 events => throughput is 9.32E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.8350s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9520s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8830s for 8192 events => throughput is 9.28E+03 events/s *** (3) Compare GMADEVENT_CUDACPP x1 xsec to MADEVENT xsec *** @@ -529,9 +529,9 @@ Executing ' ./build.none_m_inl0_hrd0/gmadevent_cudacpp < /tmp/avalassi/input_ggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358448796664319E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 1181 events) - [COUNTERS] PROGRAM TOTAL : 16.9447s - [COUNTERS] Fortran Overhead ( 0 ) : 7.2752s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6695s for 90112 events => throughput is 9.32E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.9627s + [COUNTERS] Fortran Overhead ( 0 ) : 7.2781s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6846s for 90112 events => throughput is 9.30E+03 events/s *** (3) Compare GMADEVENT_CUDACPP x10 xsec to MADEVENT xsec *** @@ -544,41 +544,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.232608e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.208325e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077021e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064512e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104387e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103749e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.151924e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.153152e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108056e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104387e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112919e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105354e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107683e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.625854e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627530e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index be06ca150d..c8efee722f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:13:02 +DATE: 2022-12-18_08:25:10 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.535517e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.159528e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.722821e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.542585e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.015904e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.831455e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.027292 sec - 471,359,919 cycles:u # 0.458 GHz - 806,095,810 instructions:u # 1.71 insn per cycle - 1.507308389 seconds time elapsed +TOTAL : 0.755650 sec + 468,579,154 cycles:u # 0.473 GHz + 809,589,748 instructions:u # 1.73 insn per cycle + 1.112841852 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.271626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.299344e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.299344e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.248174e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.292975e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292975e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.284241 sec - 19,282,455,241 cycles:u # 2.641 GHz - 42,965,017,429 instructions:u # 2.23 insn per cycle - 7.555554229 seconds time elapsed +TOTAL : 7.303382 sec + 19,341,935,467 cycles:u # 2.642 GHz + 42,965,017,260 instructions:u # 2.22 insn per cycle + 7.350387114 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.490052e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.819962e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.819962e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.483867e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.821019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.821019e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.717002 sec - 12,429,056,804 cycles:u # 2.626 GHz - 27,349,468,889 instructions:u # 2.20 insn per cycle - 4.842404733 seconds time elapsed +TOTAL : 4.740640 sec + 12,479,822,002 cycles:u # 2.626 GHz + 27,349,470,345 instructions:u # 2.19 insn per cycle + 4.776880584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.947324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.939320e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.939320e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.937918e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.938666e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.938666e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.716934 sec - 9,252,033,814 cycles:u # 2.479 GHz - 16,334,068,187 instructions:u # 1.77 insn per cycle - 3.783164369 seconds time elapsed +TOTAL : 3.751869 sec + 9,307,395,922 cycles:u # 2.479 GHz + 16,334,069,525 instructions:u # 1.75 insn per cycle + 3.790887207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.987289e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.212046e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.212046e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.990092e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.253769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.253769e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.652304 sec - 9,103,884,093 cycles:u # 2.482 GHz - 15,962,872,487 instructions:u # 1.75 insn per cycle - 3.752595567 seconds time elapsed +TOTAL : 3.662366 sec + 9,109,409,090 cycles:u # 2.483 GHz + 15,962,872,836 instructions:u # 1.75 insn per cycle + 3.715378969 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.930997e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.832456e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.832456e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921632e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.786086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.786086e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.749659 sec - 8,421,322,282 cycles:u # 2.237 GHz - 12,773,070,331 instructions:u # 1.52 insn per cycle - 3.865634440 seconds time elapsed +TOTAL : 3.786244 sec + 8,460,862,856 cycles:u # 2.235 GHz + 12,773,070,715 instructions:u # 1.51 insn per cycle + 3.825116150 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 266) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 9caade089e..f74a38acd0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:51:03 +DATE: 2022-12-18_09:02:41 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.579687e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.473373e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.473373e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.562154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.332593e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.332593e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.390014 sec - 7,340,447,866 cycles:u # 2.012 GHz - 11,742,011,278 instructions:u # 1.60 insn per cycle - 3.705930636 seconds time elapsed +TOTAL : 3.408698 sec + 7,348,723,205 cycles:u # 2.005 GHz + 11,759,458,594 instructions:u # 1.60 insn per cycle + 3.725813147 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -71,14 +71,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.999193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241932e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241932e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.994121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.241030e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241030e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.739105 sec - 20,326,597,375 cycles:u # 2.619 GHz - 43,370,678,181 instructions:u # 2.13 insn per cycle - 7.765892080 seconds time elapsed +TOTAL : 7.748524 sec + 20,353,390,532 cycles:u # 2.618 GHz + 43,370,678,896 instructions:u # 2.13 insn per cycle + 7.776587549 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd0/runTest.exe @@ -97,14 +97,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.411287e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.551591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.551591e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.411244e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.550241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.550241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.218729 sec - 13,560,426,162 cycles:u # 2.590 GHz - 28,213,884,053 instructions:u # 2.08 insn per cycle - 5.245394860 seconds time elapsed +TOTAL : 5.211440 sec + 13,569,330,178 cycles:u # 2.592 GHz + 28,213,884,980 instructions:u # 2.08 insn per cycle + 5.239336730 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd0/runTest.exe @@ -123,14 +123,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.807565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.162125e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162125e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.809485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.149221e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149221e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.238088 sec - 10,440,172,734 cycles:u # 2.450 GHz - 17,716,217,161 instructions:u # 1.70 insn per cycle - 4.265166886 seconds time elapsed +TOTAL : 4.237645 sec + 10,452,537,891 cycles:u # 2.453 GHz + 17,716,217,606 instructions:u # 1.69 insn per cycle + 4.266231955 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.859258e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.382820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.382820e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.852234e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.368594e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.368594e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.128689 sec - 10,181,770,916 cycles:u # 2.452 GHz - 17,345,021,131 instructions:u # 1.70 insn per cycle - 4.155206123 seconds time elapsed +TOTAL : 4.152946 sec + 10,256,752,366 cycles:u # 2.455 GHz + 17,345,021,840 instructions:u # 1.69 insn per cycle + 4.181108075 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.804957e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.101295e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.101295e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.800537e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.092863e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.092863e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.240726 sec - 9,561,952,890 cycles:u # 2.242 GHz - 13,940,588,312 instructions:u # 1.46 insn per cycle - 4.267935638 seconds time elapsed +TOTAL : 4.253992 sec + 9,583,657,299 cycles:u # 2.240 GHz + 13,940,588,419 instructions:u # 1.45 insn per cycle + 4.282017435 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 266) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index b46b15871e..2bd1915947 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_05:05:35 +DATE: 2022-12-18_09:17:10 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.218194e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.446653e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.650331e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.769056e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.279039e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.693053e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.563056 sec - 2,227,349,993 cycles:u # 1.235 GHz - 3,656,783,264 instructions:u # 1.64 insn per cycle - 1.861709954 seconds time elapsed +TOTAL : 1.572082 sec + 2,221,841,259 cycles:u # 1.220 GHz + 3,672,604,709 instructions:u # 1.65 insn per cycle + 1.879735441 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.311223e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302665e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.302665e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.322042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302554e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302554e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 7.777766 sec - 20,085,566,734 cycles:u # 2.578 GHz - 43,076,632,186 instructions:u # 2.14 insn per cycle - 7.793679399 seconds time elapsed +TOTAL : 7.780210 sec + 20,095,696,185 cycles:u # 2.579 GHz + 43,076,632,383 instructions:u # 2.14 insn per cycle + 7.796785264 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.490778e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.818187e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.818187e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.494733e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826019e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.242639 sec - 13,301,232,936 cycles:u # 2.531 GHz - 27,360,421,741 instructions:u # 2.06 insn per cycle - 5.258644731 seconds time elapsed +TOTAL : 5.234148 sec + 13,292,838,956 cycles:u # 2.533 GHz + 27,360,421,518 instructions:u # 2.06 insn per cycle + 5.250688322 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.941486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.930721e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.930721e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.941645e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.904627e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.904627e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.251583 sec - 10,161,931,090 cycles:u # 2.383 GHz - 16,244,338,915 instructions:u # 1.60 insn per cycle - 4.267498262 seconds time elapsed +TOTAL : 4.259538 sec + 10,177,268,183 cycles:u # 2.382 GHz + 16,244,338,675 instructions:u # 1.60 insn per cycle + 4.276452411 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.997790e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.235780e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.235780e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.997889e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.243991e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.243991e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.167501 sec - 9,970,922,302 cycles:u # 2.385 GHz - 15,671,815,947 instructions:u # 1.57 insn per cycle - 4.183531473 seconds time elapsed +TOTAL : 4.175246 sec + 9,983,304,455 cycles:u # 2.384 GHz + 15,671,815,940 instructions:u # 1.57 insn per cycle + 4.191808043 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.937866e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.861201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.861201e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.948240e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.904491e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.904491e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.267171 sec - 9,309,890,183 cycles:u # 2.175 GHz - 12,482,013,612 instructions:u # 1.34 insn per cycle - 4.283218917 seconds time elapsed +TOTAL : 4.269205 sec + 9,291,640,906 cycles:u # 2.170 GHz + 12,482,013,908 instructions:u # 1.34 insn per cycle + 4.285983292 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 266) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 57ab2249e1..5e546fb792 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_05:02:01 +DATE: 2022-12-18_09:13:37 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.909176e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.324987e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.731071e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.804459e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.297131e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.714861e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.028168 sec - 1,279,994,135 cycles:u # 1.009 GHz - 3,561,401,114 instructions:u # 2.78 insn per cycle - 1.326922190 seconds time elapsed +TOTAL : 1.029991 sec + 1,289,337,996 cycles:u # 1.004 GHz + 3,565,384,004 instructions:u # 2.77 insn per cycle + 1.341800672 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.308092e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.301631e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301631e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.316449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.301553e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301553e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.249144 sec - 19,195,240,863 cycles:u # 2.643 GHz - 42,965,018,150 instructions:u # 2.24 insn per cycle - 7.265550906 seconds time elapsed +TOTAL : 7.243497 sec + 19,201,637,569 cycles:u # 2.646 GHz + 42,965,018,180 instructions:u # 2.24 insn per cycle + 7.260320977 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.458500e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.704960e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.704960e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.491550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.818428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818428e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.803787 sec - 12,650,013,915 cycles:u # 2.628 GHz - 27,349,470,607 instructions:u # 2.16 insn per cycle - 4.820452059 seconds time elapsed +TOTAL : 4.710041 sec + 12,414,695,369 cycles:u # 2.629 GHz + 27,349,470,585 instructions:u # 2.20 insn per cycle + 4.726573623 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.940029e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.887937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.887937e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.937906e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.906755e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.906755e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.724483 sec - 9,264,974,447 cycles:u # 2.479 GHz - 16,334,070,012 instructions:u # 1.76 insn per cycle - 3.741062900 seconds time elapsed +TOTAL : 3.728991 sec + 9,289,526,105 cycles:u # 2.482 GHz + 16,334,069,993 instructions:u # 1.76 insn per cycle + 3.745734743 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.994322e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.218808e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.218808e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.002687e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.248044e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.248044e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.634746 sec - 9,060,181,266 cycles:u # 2.484 GHz - 15,962,873,608 instructions:u # 1.76 insn per cycle - 3.651317214 seconds time elapsed +TOTAL : 3.622950 sec + 9,041,277,198 cycles:u # 2.486 GHz + 15,962,873,440 instructions:u # 1.77 insn per cycle + 3.639697280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.948287e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.898493e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.898493e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.930442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.876463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.876463e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.713399 sec - 8,354,482,988 cycles:u # 2.241 GHz - 12,773,071,768 instructions:u # 1.53 insn per cycle - 3.729971889 seconds time elapsed +TOTAL : 3.747224 sec + 8,434,291,800 cycles:u # 2.242 GHz + 12,773,071,583 instructions:u # 1.51 insn per cycle + 3.764450105 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 266) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 4fb5d7d209..33ea9d258a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:58:23 +DATE: 2022-12-18_09:10:00 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.962751e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.216496e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.523493e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.991735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.211099e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.531261e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.018965 sec - 6,635,238,065 cycles:u # 2.038 GHz - 10,858,222,106 instructions:u # 1.64 insn per cycle - 3.318008230 seconds time elapsed +TOTAL : 3.010093 sec + 6,605,461,211 cycles:u # 2.033 GHz + 10,862,922,274 instructions:u # 1.64 insn per cycle + 3.311546093 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -67,14 +67,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.312972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.301066e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301066e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.321009e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302868e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.245753 sec - 19,181,134,416 cycles:u # 2.643 GHz - 42,965,017,976 instructions:u # 2.24 insn per cycle - 7.262540391 seconds time elapsed +TOTAL : 7.239743 sec + 19,194,413,498 cycles:u # 2.646 GHz + 42,965,018,106 instructions:u # 2.24 insn per cycle + 7.256425164 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd0/runTest.exe @@ -92,14 +92,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.490268e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821654e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.821654e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.493569e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.823521e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.823521e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.710551 sec - 12,408,112,363 cycles:u # 2.627 GHz - 27,349,471,041 instructions:u # 2.20 insn per cycle - 4.727002609 seconds time elapsed +TOTAL : 4.702768 sec + 12,405,510,477 cycles:u # 2.630 GHz + 27,349,470,887 instructions:u # 2.20 insn per cycle + 4.719626577 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd0/runTest.exe @@ -117,14 +117,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.946197e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.943586e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.943586e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.948841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.915829e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.915829e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.713753 sec - 9,242,526,898 cycles:u # 2.479 GHz - 16,334,070,283 instructions:u # 1.77 insn per cycle - 3.730556840 seconds time elapsed +TOTAL : 3.709983 sec + 9,241,009,292 cycles:u # 2.482 GHz + 16,334,070,390 instructions:u # 1.77 insn per cycle + 3.726693343 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1046) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.998212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.215448e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.215448e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001518e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.251527e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.251527e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.628850 sec - 9,045,003,748 cycles:u # 2.483 GHz - 15,962,873,543 instructions:u # 1.76 insn per cycle - 3.645743764 seconds time elapsed +TOTAL : 3.626378 sec + 9,047,885,300 cycles:u # 2.485 GHz + 15,962,873,582 instructions:u # 1.76 insn per cycle + 3.643395559 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd0/runTest.exe @@ -167,14 +167,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.950441e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.906461e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.906461e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.941736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.868931e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.868931e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.710834 sec - 8,344,914,877 cycles:u # 2.241 GHz - 12,773,071,714 instructions:u # 1.53 insn per cycle - 3.727183498 seconds time elapsed +TOTAL : 3.728345 sec + 8,383,143,816 cycles:u # 2.240 GHz + 12,773,071,764 instructions:u # 1.52 insn per cycle + 3.745332122 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 266) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 80650ef32c..709316e302 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:13:44 +DATE: 2022-12-18_08:25:48 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.603614e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.813985e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.075127e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.376648e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.828142e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079784e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.702913 sec - 447,127,515 cycles:u # 0.466 GHz - 778,770,967 instructions:u # 1.74 insn per cycle - 1.153665161 seconds time elapsed +TOTAL : 0.705649 sec + 456,069,457 cycles:u # 0.478 GHz + 785,305,676 instructions:u # 1.72 insn per cycle + 1.080317925 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 122 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.302394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303392e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.303392e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.219832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302265e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302265e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.256086 sec - 19,200,822,806 cycles:u # 2.641 GHz - 42,913,896,458 instructions:u # 2.24 insn per cycle - 7.293444166 seconds time elapsed +TOTAL : 7.324372 sec + 19,394,727,622 cycles:u # 2.643 GHz + 42,913,897,599 instructions:u # 2.21 insn per cycle + 7.375152469 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 349) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl0_hrd1/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.489691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821461e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.821461e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.486676e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.820206e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.820206e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.716006 sec - 12,423,690,148 cycles:u # 2.626 GHz - 27,310,934,353 instructions:u # 2.20 insn per cycle - 4.824173306 seconds time elapsed +TOTAL : 4.730752 sec + 12,473,795,385 cycles:u # 2.628 GHz + 27,310,935,268 instructions:u # 2.19 insn per cycle + 4.770879111 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1258) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.948460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.940554e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.940554e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.945456e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.956456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.956456e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.712722 sec - 9,240,380,401 cycles:u # 2.479 GHz - 16,308,115,174 instructions:u # 1.76 insn per cycle - 3.774522909 seconds time elapsed +TOTAL : 3.722970 sec + 9,271,630,944 cycles:u # 2.480 GHz + 16,308,115,978 instructions:u # 1.76 insn per cycle + 3.754375996 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1026) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl0_hrd1/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.981675e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.208076e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.208076e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.982098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.245044e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.245044e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.660848 sec - 9,120,617,735 cycles:u # 2.481 GHz - 15,949,502,470 instructions:u # 1.75 insn per cycle - 3.734031357 seconds time elapsed +TOTAL : 3.663811 sec + 9,143,756,237 cycles:u # 2.485 GHz + 15,949,502,435 instructions:u # 1.74 insn per cycle + 3.707989879 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 981) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl0_hrd1/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.976643e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.119268e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.119268e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.970103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.167498e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.167498e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.672716 sec - 8,295,761,040 cycles:u # 2.249 GHz - 12,652,744,082 instructions:u # 1.53 insn per cycle - 3.737076858 seconds time elapsed +TOTAL : 3.688722 sec + 8,352,372,118 cycles:u # 2.254 GHz + 12,652,744,382 instructions:u # 1.51 insn per cycle + 3.727076890 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 244) (512y: 0) (512z: 663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 710423d124..15d702acbc 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:40:14 +DATE: 2022-12-18_08:51:51 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.477266e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.201830e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.784497e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.451902e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.191235e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.792532e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.715075 sec - 449,552,435 cycles:u # 0.468 GHz - 784,286,053 instructions:u # 1.74 insn per cycle - 1.022733860 seconds time elapsed +TOTAL : 0.728353 sec + 452,718,488 cycles:u # 0.464 GHz + 779,527,322 instructions:u # 1.72 insn per cycle + 1.035596415 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 148 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.845892e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.303000e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.303000e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.834707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.260140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.260140e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.907377 sec - 10,259,984,871 cycles:u # 2.616 GHz - 19,403,269,134 instructions:u # 1.89 insn per cycle - 3.925736108 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 197) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.926796 sec + 10,321,129,879 cycles:u # 2.618 GHz + 19,478,766,594 instructions:u # 1.89 insn per cycle + 3.945352467 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 200) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.259623e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.080048e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.080048e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.271210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.141401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.141401e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.285764 sec - 8,592,769,218 cycles:u # 2.603 GHz - 15,439,622,641 instructions:u # 1.80 insn per cycle - 3.304654556 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 795) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.270727 sec + 8,560,164,515 cycles:u # 2.605 GHz + 15,401,874,028 instructions:u # 1.80 insn per cycle + 3.289210884 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 792) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.481109e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.084817e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.084817e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.460358e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.062805e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.062805e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.025483 sec - 7,676,575,900 cycles:u # 2.524 GHz - 12,461,632,905 instructions:u # 1.62 insn per cycle - 3.044056233 seconds time elapsed +TOTAL : 3.048972 sec + 7,737,159,746 cycles:u # 2.525 GHz + 12,461,632,971 instructions:u # 1.61 insn per cycle + 3.067439345 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 442) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl1_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.474380e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.082919e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.082919e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.474103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077276e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077276e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.034328 sec - 7,695,037,570 cycles:u # 2.523 GHz - 12,257,162,033 instructions:u # 1.59 insn per cycle - 3.052940189 seconds time elapsed +TOTAL : 3.033595 sec + 7,697,478,076 cycles:u # 2.525 GHz + 12,257,161,970 instructions:u # 1.59 insn per cycle + 3.051839284 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 411) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl1_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.322924e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.690047e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.690047e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.344454e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.700141e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.700141e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.205063 sec - 7,591,617,226 cycles:u # 2.357 GHz - 11,300,853,028 instructions:u # 1.49 insn per cycle - 3.223971874 seconds time elapsed +TOTAL : 3.180023 sec + 7,533,965,080 cycles:u # 2.358 GHz + 11,300,852,547 instructions:u # 1.50 insn per cycle + 3.198614844 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 197) (512y: 0) (512z: 276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index e9c99419da..b37efca694 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:40:45 +DATE: 2022-12-18_08:52:22 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.574938e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.864241e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.080113e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.535220e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.827691e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077689e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.704133 sec - 451,352,032 cycles:u # 0.475 GHz - 782,669,993 instructions:u # 1.73 insn per cycle - 1.011574672 seconds time elapsed +TOTAL : 0.931533 sec + 451,845,724 cycles:u # 0.422 GHz + 782,335,605 instructions:u # 1.73 insn per cycle + 1.243270757 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 122 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.244752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.273530e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.273530e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.247001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.261034e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.261034e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.302099 sec - 8,623,020,576 cycles:u # 2.600 GHz - 15,703,067,131 instructions:u # 1.82 insn per cycle - 3.320829637 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 165) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.299484 sec + 8,638,683,722 cycles:u # 2.606 GHz + 15,677,900,863 instructions:u # 1.81 insn per cycle + 3.317873979 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 164) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.539718e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312480e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.312480e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.535423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307785e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307785e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.975458 sec - 7,766,381,271 cycles:u # 2.596 GHz - 13,425,548,828 instructions:u # 1.73 insn per cycle - 2.994397110 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.980579 sec + 7,785,408,055 cycles:u # 2.599 GHz + 13,444,423,447 instructions:u # 1.73 insn per cycle + 2.999143372 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.561323e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.310100e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.310100e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.578445e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321467e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.947872 sec - 7,502,890,065 cycles:u # 2.532 GHz - 11,771,924,206 instructions:u # 1.57 insn per cycle - 2.966478332 seconds time elapsed +TOTAL : 2.928466 sec + 7,467,443,074 cycles:u # 2.536 GHz + 11,771,923,904 instructions:u # 1.58 insn per cycle + 2.947177649 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_d_inl1_hrd1/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.618684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403952e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403952e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.622818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.404595e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.404595e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.891837 sec - 7,373,027,050 cycles:u # 2.536 GHz - 11,679,127,510 instructions:u # 1.58 insn per cycle - 2.910407458 seconds time elapsed +TOTAL : 2.887275 sec + 7,370,272,871 cycles:u # 2.539 GHz + 11,679,127,696 instructions:u # 1.58 insn per cycle + 2.905794226 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 333) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_d_inl1_hrd1/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.421049e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.794015e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.794015e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.419863e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.793837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.793837e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.092878 sec - 7,384,265,108 cycles:u # 2.376 GHz - 11,013,801,497 instructions:u # 1.49 insn per cycle - 3.111289485 seconds time elapsed +TOTAL : 3.093706 sec + 7,393,898,226 cycles:u # 2.378 GHz + 11,013,801,247 instructions:u # 1.49 insn per cycle + 3.112078031 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 166) (512y: 0) (512z: 227) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index a3be445dc1..2947a01418 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:14:24 +DATE: 2022-12-18_08:26:26 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.332038e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317572e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.770122e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.236713e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320157e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.777030e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.596114 sec - 331,474,793 cycles:u # 0.395 GHz - 690,979,006 instructions:u # 2.08 insn per cycle - 0.968446754 seconds time elapsed +TOTAL : 0.620549 sec + 337,174,098 cycles:u # 0.403 GHz + 697,659,379 instructions:u # 2.07 insn per cycle + 1.005960947 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.957413e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316269e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316269e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.970238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320062e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.320062e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.748318 sec - 17,929,193,947 cycles:u # 2.654 GHz - 41,799,943,984 instructions:u # 2.33 insn per cycle - 6.810088744 seconds time elapsed +TOTAL : 6.741284 sec + 17,936,426,106 cycles:u # 2.656 GHz + 41,749,612,241 instructions:u # 2.33 insn per cycle + 6.838104872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 355) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.284266e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.474107e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.474107e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.285962e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.515861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.515861e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 3.182548 sec - 8,410,299,875 cycles:u # 2.634 GHz - 18,527,617,623 instructions:u # 2.20 insn per cycle - 3.227457705 seconds time elapsed +TOTAL : 3.191890 sec + 8,418,678,634 cycles:u # 2.636 GHz + 18,527,617,799 instructions:u # 2.20 insn per cycle + 3.225401008 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.881738e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014690e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014690e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.875113e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.016387e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.016387e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.605198 sec - 6,623,895,416 cycles:u # 2.533 GHz - 12,423,010,790 instructions:u # 1.88 insn per cycle - 2.641525457 seconds time elapsed +TOTAL : 2.627616 sec + 6,653,283,192 cycles:u # 2.533 GHz + 12,423,011,981 instructions:u # 1.87 insn per cycle + 2.658747297 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.929959e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079261e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079261e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.927021e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.081801e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081801e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.561492 sec - 6,525,402,505 cycles:u # 2.538 GHz - 12,231,123,646 instructions:u # 1.87 insn per cycle - 2.613029785 seconds time elapsed +TOTAL : 2.579700 sec + 6,543,801,545 cycles:u # 2.537 GHz + 12,231,124,870 instructions:u # 1.87 insn per cycle + 2.601335483 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1069) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.889515e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.992181e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.992181e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.864330e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.846482e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.846482e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.600589 sec - 6,160,424,866 cycles:u # 2.360 GHz - 10,694,024,865 instructions:u # 1.74 insn per cycle - 2.649090224 seconds time elapsed +TOTAL : 2.661818 sec + 6,213,045,406 cycles:u # 2.357 GHz + 10,694,025,452 instructions:u # 1.72 insn per cycle + 2.694682201 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 387) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 70210bfa3b..ba5e5e1431 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:51:46 +DATE: 2022-12-18_09:03:24 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -53,14 +53,14 @@ WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.626810e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.875941e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.875941e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.510311e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.731453e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.731453e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371709e-02 +- 3.270378e-06 ) GeV^0 -TOTAL : 1.984697 sec - 3,902,727,767 cycles:u # 1.755 GHz - 7,562,784,849 instructions:u # 1.94 insn per cycle - 2.282226134 seconds time elapsed +TOTAL : 2.009474 sec + 3,960,941,890 cycles:u # 1.761 GHz + 7,572,058,725 instructions:u # 1.91 insn per cycle + 2.307430559 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -83,14 +83,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.772502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.282760e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.282760e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.791661e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.287337e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.287337e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.998340 sec - 18,502,858,472 cycles:u # 2.639 GHz - 42,027,385,032 instructions:u # 2.27 insn per cycle - 7.016466951 seconds time elapsed +TOTAL : 6.989262 sec + 18,499,767,190 cycles:u # 2.641 GHz + 41,977,053,614 instructions:u # 2.27 insn per cycle + 7.007604540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 355) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd0/runTest.exe @@ -115,14 +115,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.176667e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.888756e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.888756e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.176634e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.899328e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.899328e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 3.467707 sec - 9,080,442,955 cycles:u # 2.607 GHz - 19,882,280,022 instructions:u # 2.19 insn per cycle - 3.486508543 seconds time elapsed +TOTAL : 3.468328 sec + 9,082,983,055 cycles:u # 2.608 GHz + 19,882,279,514 instructions:u # 2.19 insn per cycle + 3.486557717 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd0/runTest.exe @@ -146,14 +146,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.701642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.287395e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.287395e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.703944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.292366e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.292366e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.907743 sec - 7,332,460,931 cycles:u # 2.509 GHz - 13,563,042,745 instructions:u # 1.85 insn per cycle - 2.925606383 seconds time elapsed +TOTAL : 2.906784 sec + 7,335,594,742 cycles:u # 2.510 GHz + 13,563,042,862 instructions:u # 1.85 insn per cycle + 2.924926700 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd0/runTest.exe @@ -177,14 +177,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.738782e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.679666e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.679666e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.739146e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.693087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.693087e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.866512 sec - 7,239,056,734 cycles:u # 2.512 GHz - 13,371,155,453 instructions:u # 1.85 insn per cycle - 2.884231805 seconds time elapsed +TOTAL : 2.872934 sec + 7,259,881,345 cycles:u # 2.513 GHz + 13,371,155,686 instructions:u # 1.84 insn per cycle + 2.891310952 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1069) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd0/runTest.exe @@ -208,14 +208,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.677355e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.856832e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.856832e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.659346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.904663e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.904663e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.930507 sec - 6,931,747,087 cycles:u # 2.354 GHz - 11,920,891,088 instructions:u # 1.72 insn per cycle - 2.948632587 seconds time elapsed +TOTAL : 2.948167 sec + 6,983,849,929 cycles:u # 2.357 GHz + 11,920,890,860 instructions:u # 1.71 insn per cycle + 2.966660422 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 387) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 7f0c09403c..e36108b985 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_05:06:16 +DATE: 2022-12-18_09:17:51 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.404593e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.334601e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.724631e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.384246e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.338496e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.723018e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.400176 sec - 1,990,824,943 cycles:u # 1.218 GHz - 3,585,238,689 instructions:u # 1.80 insn per cycle - 1.693031845 seconds time elapsed +TOTAL : 1.410233 sec + 2,000,289,878 cycles:u # 1.219 GHz + 3,588,422,890 instructions:u # 1.79 insn per cycle + 1.698896131 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=3, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.984896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319467e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.319467e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.996682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321994e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270268e-06 ) GeV^0 -TOTAL : 7.228192 sec - 18,705,803,117 cycles:u # 2.585 GHz - 41,952,522,041 instructions:u # 2.24 insn per cycle - 7.240116399 seconds time elapsed +TOTAL : 7.232522 sec + 18,704,153,783 cycles:u # 2.583 GHz + 41,902,190,799 instructions:u # 2.24 insn per cycle + 7.244877788 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 355) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=3, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.261394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.505440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.505440e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.293518e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.515553e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.515553e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270268e-06 ) GeV^0 -TOTAL : 3.709765 sec - 9,298,571,932 cycles:u # 2.500 GHz - 18,579,533,003 instructions:u # 2.00 insn per cycle - 3.721910062 seconds time elapsed +TOTAL : 3.671381 sec + 9,201,852,047 cycles:u # 2.501 GHz + 18,579,532,861 instructions:u # 2.02 insn per cycle + 3.683511276 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=3, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.863001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010190e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010190e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.864964e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.012197e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012197e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.122047 sec - 7,460,021,425 cycles:u # 2.383 GHz - 12,305,057,911 instructions:u # 1.65 insn per cycle - 3.134046456 seconds time elapsed +TOTAL : 3.126623 sec + 7,462,449,723 cycles:u # 2.381 GHz + 12,305,057,646 instructions:u # 1.65 insn per cycle + 3.138826442 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=3, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.912816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.072675e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.072675e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.910265e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.072103e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072103e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.083060 sec - 7,374,738,832 cycles:u # 2.386 GHz - 11,911,844,335 instructions:u # 1.62 insn per cycle - 3.095150444 seconds time elapsed +TOTAL : 3.090893 sec + 7,393,605,387 cycles:u # 2.385 GHz + 11,911,844,100 instructions:u # 1.61 insn per cycle + 3.103021048 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1069) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=3, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.844251e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.835523e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.835523e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.872428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.874512e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.874512e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.144052 sec - 7,072,279,961 cycles:u # 2.243 GHz - 10,374,744,586 instructions:u # 1.47 insn per cycle - 3.156454506 seconds time elapsed +TOTAL : 3.123025 sec + 7,019,873,084 cycles:u # 2.241 GHz + 10,374,744,543 instructions:u # 1.48 insn per cycle + 3.135138770 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 387) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index a1e8638f35..651605ce98 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_05:02:39 +DATE: 2022-12-18_09:14:15 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.411169e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.356407e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.762393e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.386848e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.353078e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.776283e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.903918 sec - 1,143,845,314 cycles:u # 1.010 GHz - 3,429,110,504 instructions:u # 3.00 insn per cycle - 1.191274494 seconds time elapsed +TOTAL : 0.904525 sec + 1,148,113,342 cycles:u # 1.013 GHz + 3,431,461,035 instructions:u # 2.99 insn per cycle + 1.191551641 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.987139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319770e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.319770e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.000387e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322646e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.724123 sec - 17,878,272,421 cycles:u # 2.655 GHz - 41,799,944,541 instructions:u # 2.34 insn per cycle - 6.736478637 seconds time elapsed +TOTAL : 6.715348 sec + 17,872,685,918 cycles:u # 2.658 GHz + 41,749,612,916 instructions:u # 2.34 insn per cycle + 6.727652669 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 355) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd0/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.294894e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.512676e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.512676e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.295344e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.513632e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513632e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 3.167802 sec - 8,373,937,598 cycles:u # 2.636 GHz - 18,527,618,390 instructions:u # 2.21 insn per cycle - 3.179926937 seconds time elapsed +TOTAL : 3.168425 sec + 8,383,020,316 cycles:u # 2.638 GHz + 18,527,618,241 instructions:u # 2.21 insn per cycle + 3.180744122 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.885814e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014588e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014588e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.884002e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.015236e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.015236e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.603790 sec - 6,617,055,830 cycles:u # 2.533 GHz - 12,423,013,068 instructions:u # 1.88 insn per cycle - 2.615762373 seconds time elapsed +TOTAL : 2.604298 sec + 6,620,993,672 cycles:u # 2.534 GHz + 12,423,012,855 instructions:u # 1.88 insn per cycle + 2.616608583 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.900263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.074214e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.074214e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.927635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.080117e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080117e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.584496 sec - 6,581,070,587 cycles:u # 2.537 GHz - 12,231,125,755 instructions:u # 1.86 insn per cycle - 2.597386058 seconds time elapsed +TOTAL : 2.565143 sec + 6,536,739,018 cycles:u # 2.539 GHz + 12,231,125,627 instructions:u # 1.87 insn per cycle + 2.577475952 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1069) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.851863e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.001296e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001296e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.873766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.852192e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.852192e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.628157 sec - 6,235,934,613 cycles:u # 2.365 GHz - 10,694,025,923 instructions:u # 1.71 insn per cycle - 2.640158965 seconds time elapsed +TOTAL : 2.613235 sec + 6,191,219,017 cycles:u # 2.360 GHz + 10,694,025,955 instructions:u # 1.73 insn per cycle + 2.626304477 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 387) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index f4375487c7..d76251af22 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:59:03 +DATE: 2022-12-18_09:10:40 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.778116e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.299176e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.563080e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.805381e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304805e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.586968e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371709e-02 +- 3.270378e-06 ) GeV^0 -TOTAL : 1.767382 sec - 3,473,533,578 cycles:u # 1.739 GHz - 6,879,628,670 instructions:u # 1.98 insn per cycle - 2.055661833 seconds time elapsed +TOTAL : 1.762372 sec + 3,470,303,529 cycles:u # 1.732 GHz + 6,879,411,469 instructions:u # 1.98 insn per cycle + 2.061317666 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -67,14 +67,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.985534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319070e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.319070e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.998991e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321832e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321832e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.726832 sec - 17,881,915,541 cycles:u # 2.655 GHz - 41,799,944,677 instructions:u # 2.34 insn per cycle - 6.738957507 seconds time elapsed +TOTAL : 6.719857 sec + 17,875,290,098 cycles:u # 2.656 GHz + 41,749,613,190 instructions:u # 2.34 insn per cycle + 6.732639943 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 355) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd0/runTest.exe @@ -92,14 +92,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.296884e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.513787e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513787e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.297290e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.524329e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.524329e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 3.165891 sec - 8,368,159,796 cycles:u # 2.636 GHz - 18,527,618,383 instructions:u # 2.21 insn per cycle - 3.177875676 seconds time elapsed +TOTAL : 3.166733 sec + 8,377,708,192 cycles:u # 2.638 GHz + 18,527,618,392 instructions:u # 2.21 insn per cycle + 3.179103366 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd0/runTest.exe @@ -117,14 +117,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.877310e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.013051e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.013051e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.878168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.009006e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.009006e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.610494 sec - 6,631,938,516 cycles:u # 2.533 GHz - 12,423,013,087 instructions:u # 1.87 insn per cycle - 2.622516580 seconds time elapsed +TOTAL : 2.609590 sec + 6,637,286,362 cycles:u # 2.534 GHz + 12,423,013,047 instructions:u # 1.87 insn per cycle + 2.622215983 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1123) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.928548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079582e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079582e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.920645e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076617e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076617e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.563087 sec - 6,525,896,763 cycles:u # 2.537 GHz - 12,231,126,006 instructions:u # 1.87 insn per cycle - 2.575269391 seconds time elapsed +TOTAL : 2.570729 sec + 6,549,244,280 cycles:u # 2.539 GHz + 12,231,125,863 instructions:u # 1.87 insn per cycle + 2.583073330 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1069) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd0/runTest.exe @@ -167,14 +167,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.884257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.903132e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.903132e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.894047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.000332e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000332e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.604788 sec - 6,168,222,634 cycles:u # 2.360 GHz - 10,694,026,092 instructions:u # 1.73 insn per cycle - 2.616885986 seconds time elapsed +TOTAL : 2.596584 sec + 6,160,693,669 cycles:u # 2.364 GHz + 10,694,026,065 instructions:u # 1.74 insn per cycle + 2.609178252 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 387) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index e93ff8379a..4a228b5f17 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:14:58 +DATE: 2022-12-18_08:26:58 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.331230e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.347356e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898304e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.271529e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342896e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.896529e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.597349 sec - 330,668,282 cycles:u # 0.394 GHz - 690,303,411 instructions:u # 2.09 insn per cycle - 1.038997588 seconds time elapsed +TOTAL : 0.599076 sec + 331,915,260 cycles:u # 0.397 GHz + 695,607,322 instructions:u # 2.10 insn per cycle + 0.940616627 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,14 +66,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.928511e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315217e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.315217e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.946473e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315604e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.315604e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.763966 sec - 17,977,382,451 cycles:u # 2.654 GHz - 41,748,823,891 instructions:u # 2.32 insn per cycle - 6.797619304 seconds time elapsed +TOTAL : 6.755228 sec + 17,966,306,762 cycles:u # 2.656 GHz + 41,698,492,352 instructions:u # 2.32 insn per cycle + 6.780944664 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 338) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl0_hrd1/runTest.exe @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.277623e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.527895e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.527895e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.287496e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.519316e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.519316e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 3.189522 sec - 8,432,768,586 cycles:u # 2.636 GHz - 18,451,333,182 instructions:u # 2.19 insn per cycle - 3.248291357 seconds time elapsed +TOTAL : 3.180975 sec + 8,413,258,905 cycles:u # 2.636 GHz + 18,451,333,464 instructions:u # 2.19 insn per cycle + 3.201894632 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.876586e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014480e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014480e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.879459e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.018846e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.018846e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.605233 sec - 6,622,895,466 cycles:u # 2.533 GHz - 12,409,639,823 instructions:u # 1.87 insn per cycle - 2.685396660 seconds time elapsed +TOTAL : 2.607098 sec + 6,631,484,642 cycles:u # 2.533 GHz + 12,409,640,890 instructions:u # 1.87 insn per cycle + 2.634310282 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1093) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl0_hrd1/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.920370e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076598e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076598e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.909752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077072e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077072e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.569077 sec - 6,535,087,174 cycles:u # 2.535 GHz - 12,224,044,464 instructions:u # 1.87 insn per cycle - 2.633981557 seconds time elapsed +TOTAL : 2.581604 sec + 6,574,245,715 cycles:u # 2.536 GHz + 12,224,045,783 instructions:u # 1.86 insn per cycle + 2.616475739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1039) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl0_hrd1/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.945065e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.066922e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.066922e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.927958e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.064308e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064308e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.554592 sec - 6,088,720,878 cycles:u # 2.374 GHz - 10,620,097,460 instructions:u # 1.74 insn per cycle - 2.592826157 seconds time elapsed +TOTAL : 2.572807 sec + 6,140,939,336 cycles:u # 2.377 GHz + 10,620,098,446 instructions:u # 1.73 insn per cycle + 2.588385189 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 354) (512y: 0) (512z: 678) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index d84ca10915..30de346bce 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:41:15 +DATE: 2022-12-18_08:52:52 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.245412e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320858e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.779091e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.274211e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321318e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779291e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.600735 sec - 334,827,643 cycles:u # 0.400 GHz - 700,759,864 instructions:u # 2.09 insn per cycle - 0.896253341 seconds time elapsed +TOTAL : 0.449731 sec + 333,598,126 cycles:u # 0.580 GHz + 698,773,423 instructions:u # 2.09 insn per cycle + 0.746645126 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.145813e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.587662e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.587662e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.150038e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.570705e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.570705e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.363868 sec - 8,890,764,643 cycles:u # 2.635 GHz - 18,496,148,341 instructions:u # 2.08 insn per cycle - 3.377503821 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 217) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.360209 sec + 8,885,764,777 cycles:u # 2.636 GHz + 18,571,645,825 instructions:u # 2.09 insn per cycle + 3.373726733 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.971065e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241429e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241429e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.975099e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.248143e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.248143e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 2.548248 sec - 6,711,500,317 cycles:u # 2.623 GHz - 12,959,617,114 instructions:u # 1.93 insn per cycle - 2.562128243 seconds time elapsed +TOTAL : 2.545980 sec + 6,706,930,062 cycles:u # 2.623 GHz + 12,959,617,352 instructions:u # 1.93 insn per cycle + 2.559726206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 520) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl1_hrd0/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.416193e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.348220e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.348220e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.424016e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344147e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344147e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.266109 sec - 5,852,083,982 cycles:u # 2.570 GHz - 10,471,062,024 instructions:u # 1.79 insn per cycle - 2.279783049 seconds time elapsed +TOTAL : 2.262389 sec + 5,845,609,279 cycles:u # 2.572 GHz + 10,471,062,140 instructions:u # 1.79 insn per cycle + 2.275865272 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 513) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl1_hrd0/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.413518e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.323576e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.323576e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.411379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.323988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.323988e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.263011 sec - 5,846,234,200 cycles:u # 2.571 GHz - 10,371,974,779 instructions:u # 1.77 insn per cycle - 2.276377105 seconds time elapsed +TOTAL : 2.266376 sec + 5,847,125,683 cycles:u # 2.570 GHz + 10,371,974,824 instructions:u # 1.77 insn per cycle + 2.279782928 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 477) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl1_hrd0/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.306617e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.815269e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.815269e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.329228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.858820e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858820e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.328117 sec - 5,743,113,717 cycles:u # 2.456 GHz - 9,972,856,732 instructions:u # 1.74 insn per cycle - 2.341729148 seconds time elapsed +TOTAL : 2.314465 sec + 5,719,820,933 cycles:u # 2.460 GHz + 9,972,856,907 instructions:u # 1.74 insn per cycle + 2.327986933 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 321) (512y: 0) (512z: 301) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 910f000d46..a1af249be0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll' -DATE: 2022-12-17_04:41:42 +DATE: 2022-12-18_08:53:19 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.259319e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.339375e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.898049e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.275293e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342012e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.912033e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.601857 sec - 330,683,494 cycles:u # 0.394 GHz - 695,376,231 instructions:u # 2.10 insn per cycle - 0.897389997 seconds time elapsed +TOTAL : 0.601058 sec + 333,961,226 cycles:u # 0.398 GHz + 693,199,258 instructions:u # 2.08 insn per cycle + 0.899750012 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.720079e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.185350e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.185350e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.734197e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.296970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.296970e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.744680 sec - 7,234,831,910 cycles:u # 2.626 GHz - 14,695,281,109 instructions:u # 2.03 insn per cycle - 2.758413512 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 178) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.730863 sec + 7,204,355,004 cycles:u # 2.628 GHz + 14,745,613,315 instructions:u # 2.05 insn per cycle + 2.744565213 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 181) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.221008e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919870e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919870e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.245591e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.923303e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.923303e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270375e-06 ) GeV^0 -TOTAL : 2.385312 sec - 6,275,756,609 cycles:u # 2.619 GHz - 11,760,296,563 instructions:u # 1.87 insn per cycle - 2.399122119 seconds time elapsed +TOTAL : 2.367804 sec + 6,237,728,423 cycles:u # 2.623 GHz + 11,760,296,520 instructions:u # 1.89 insn per cycle + 2.381048792 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.sse4_f_inl1_hrd1/runTest.exe @@ -116,14 +116,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.570549e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.280031e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280031e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.521396e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.300332e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300332e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.187399 sec - 5,672,209,960 cycles:u # 2.580 GHz - 10,113,230,318 instructions:u # 1.78 insn per cycle - 2.201285872 seconds time elapsed +TOTAL : 2.211083 sec + 5,741,757,760 cycles:u # 2.585 GHz + 10,113,230,251 instructions:u # 1.76 insn per cycle + 2.224435666 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 400) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.avx2_f_inl1_hrd1/runTest.exe @@ -141,14 +141,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.478321e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.272145e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.272145e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.569534e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.288490e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288490e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.231453 sec - 5,788,957,515 cycles:u # 2.582 GHz - 10,084,136,359 instructions:u # 1.74 insn per cycle - 2.245138406 seconds time elapsed +TOTAL : 2.185337 sec + 5,671,953,208 cycles:u # 2.583 GHz + 10,084,136,576 instructions:u # 1.78 insn per cycle + 2.198814761 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 381) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512y_f_inl1_hrd1/runTest.exe @@ -166,14 +166,14 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=5, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.387889e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.137183e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.137183e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.418987e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.179546e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.179546e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270340e-06 ) GeV^0 -TOTAL : 2.285846 sec - 5,682,132,334 cycles:u # 2.476 GHz - 9,808,489,724 instructions:u # 1.73 insn per cycle - 2.299527484 seconds time elapsed +TOTAL : 2.267173 sec + 5,649,992,204 cycles:u # 2.481 GHz + 9,808,489,828 instructions:u # 1.74 insn per cycle + 2.280478973 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 274) (512y: 0) (512z: 234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_ll_ll/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 2c5fe3c1d5..e7fbbcd9ff 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:15:31 +DATE: 2022-12-18_08:27:31 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.527470e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156276e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273946e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.519405e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158438e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276169e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.616706 sec - 163,061,041 cycles:u # 0.206 GHz - 237,906,138 instructions:u # 1.46 insn per cycle - 1.228142011 seconds time elapsed +TOTAL : 0.578366 sec + 165,022,039 cycles:u # 0.209 GHz + 236,598,205 instructions:u # 1.43 insn per cycle + 1.016472311 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.669310e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.759532e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.759532e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.669202e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.759360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.759360e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.437294 sec - 17,017,815,531 cycles:u # 2.637 GHz - 45,796,589,562 instructions:u # 2.69 insn per cycle - 6.483209996 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.439676 sec + 17,031,351,516 cycles:u # 2.639 GHz + 45,789,250,335 instructions:u # 2.69 insn per cycle + 6.474541855 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.979137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.290468e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.290468e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.986830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299897e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.676243 sec - 9,639,308,254 cycles:u # 2.610 GHz - 27,017,549,220 instructions:u # 2.80 insn per cycle - 3.711153748 seconds time elapsed +TOTAL : 3.686782 sec + 9,627,929,615 cycles:u # 2.613 GHz + 27,017,024,729 instructions:u # 2.81 insn per cycle + 3.721470733 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2488) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.588997e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.358431e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.358431e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.617024e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.395375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.395375e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.439677 sec - 5,557,077,512 cycles:u # 2.263 GHz - 11,656,145,901 instructions:u # 2.10 insn per cycle - 2.474480778 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2289) (512y: 0) (512z: 0) +TOTAL : 2.444263 sec + 5,536,890,334 cycles:u # 2.265 GHz + 11,655,360,250 instructions:u # 2.11 insn per cycle + 2.464756762 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2305) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.001020e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.921491e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.921491e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.009642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.941923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.941923e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.252165 sec - 5,128,286,628 cycles:u # 2.261 GHz - 11,064,218,581 instructions:u # 2.16 insn per cycle - 2.338540450 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2078) (512y: 86) (512z: 0) +TOTAL : 2.264422 sec + 5,130,994,744 cycles:u # 2.264 GHz + 11,063,694,468 instructions:u # 2.16 insn per cycle + 2.285285260 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2094) (512y: 86) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.386102e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785418e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785418e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.383452e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.786507e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.786507e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.254427 sec - 5,473,392,107 cycles:u # 1.674 GHz - 7,390,410,444 instructions:u # 1.35 insn per cycle - 3.321609547 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1076) (512y: 98) (512z: 1622) +TOTAL : 3.267880 sec + 5,490,866,395 cycles:u # 1.677 GHz + 7,389,755,362 instructions:u # 1.35 insn per cycle + 3.289467584 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1073) (512y: 98) (512z: 1622) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index e95f599844..157958b88e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:52:21 +DATE: 2022-12-18_09:03:59 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.543915e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.830140e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.830140e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.527264e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.730361e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730361e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.009371 sec - 1,290,928,613 cycles:u # 1.021 GHz - 2,042,413,942 instructions:u # 1.58 insn per cycle - 1.323222789 seconds time elapsed +TOTAL : 1.014189 sec + 1,298,982,345 cycles:u # 1.016 GHz + 2,045,247,418 instructions:u # 1.57 insn per cycle + 1.336718629 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -71,15 +71,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.661579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.750642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.750642e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.660844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.749688e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.749688e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.587620 sec - 17,256,585,640 cycles:u # 2.610 GHz - 45,890,935,520 instructions:u # 2.66 insn per cycle - 6.615169976 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.594153 sec + 17,279,579,547 cycles:u # 2.610 GHz + 45,883,595,407 instructions:u # 2.66 insn per cycle + 6.623053343 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -97,14 +97,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.950732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.254176e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.254176e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.955916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.261768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.833064 sec - 9,888,652,452 cycles:u # 2.563 GHz - 27,203,646,633 instructions:u # 2.75 insn per cycle - 3.861026859 seconds time elapsed +TOTAL : 3.829921 sec + 9,884,806,766 cycles:u # 2.564 GHz + 27,203,122,279 instructions:u # 2.75 insn per cycle + 3.858558112 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2488) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -123,15 +123,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.534314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.277549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.277549e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.540895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286789e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.590827 sec - 5,798,345,808 cycles:u # 2.217 GHz - 11,945,790,852 instructions:u # 2.06 insn per cycle - 2.618401442 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2289) (512y: 0) (512z: 0) +TOTAL : 2.588692 sec + 5,792,164,684 cycles:u # 2.216 GHz + 11,945,004,588 instructions:u # 2.06 insn per cycle + 2.617528628 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2305) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.930706e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.817249e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.817249e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.936649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.829593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.829593e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.404733 sec - 5,376,965,168 cycles:u # 2.213 GHz - 11,353,863,242 instructions:u # 2.11 insn per cycle - 2.432434159 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2078) (512y: 86) (512z: 0) +TOTAL : 2.403534 sec + 5,374,372,157 cycles:u # 2.212 GHz + 11,353,338,684 instructions:u # 2.11 insn per cycle + 2.432477813 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2094) (512y: 86) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -175,15 +175,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.349005e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736709e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.339883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.728986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728986e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.412184 sec - 5,735,815,041 cycles:u # 1.669 GHz - 7,637,129,212 instructions:u # 1.33 insn per cycle - 3.439847606 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1076) (512y: 98) (512z: 1622) +TOTAL : 3.423313 sec + 5,761,014,223 cycles:u # 1.670 GHz + 7,636,473,480 instructions:u # 1.33 insn per cycle + 3.452336285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1073) (512y: 98) (512z: 1622) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index c39df49c15..f8e7b3c4ee 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_05:06:51 +DATE: 2022-12-18_09:18:26 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.850958e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169575e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272864e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.642912e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.163665e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274857e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.667074 sec - 424,382,087 cycles:u # 0.465 GHz - 680,116,809 instructions:u # 1.60 insn per cycle - 0.970978666 seconds time elapsed +TOTAL : 0.670445 sec + 426,132,363 cycles:u # 0.464 GHz + 682,927,090 instructions:u # 1.60 insn per cycle + 0.976547053 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.670561e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.761065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.761065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.671304e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.761524e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.761524e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 6.519616 sec - 17,151,052,725 cycles:u # 2.626 GHz - 45,814,797,844 instructions:u # 2.67 insn per cycle - 6.536741550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.518497 sec + 17,165,369,595 cycles:u # 2.627 GHz + 45,807,457,847 instructions:u # 2.67 insn per cycle + 6.536278735 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.982761e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.291979e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.291979e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.985411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.295853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.295853e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.756447 sec - 9,773,443,307 cycles:u # 2.593 GHz - 27,018,875,400 instructions:u # 2.76 insn per cycle - 3.773783024 seconds time elapsed +TOTAL : 3.754404 sec + 9,776,703,265 cycles:u # 2.594 GHz + 27,018,350,987 instructions:u # 2.76 insn per cycle + 3.772223764 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2488) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.599769e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.368401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.368401e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.604927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.375808e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.375808e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.519368 sec - 5,694,437,053 cycles:u # 2.248 GHz - 11,640,793,193 instructions:u # 2.04 insn per cycle - 2.536390745 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2289) (512y: 0) (512z: 0) +TOTAL : 2.519884 sec + 5,698,313,481 cycles:u # 2.248 GHz + 11,640,007,180 instructions:u # 2.04 insn per cycle + 2.537973147 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2305) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.031965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.959788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.959788e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.037066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.967704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.967704e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.324601 sec - 5,254,522,343 cycles:u # 2.247 GHz - 11,015,311,024 instructions:u # 2.10 insn per cycle - 2.341718520 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2078) (512y: 86) (512z: 0) +TOTAL : 2.325994 sec + 5,259,156,590 cycles:u # 2.247 GHz + 11,014,786,628 instructions:u # 2.09 insn per cycle + 2.343645730 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2094) (512y: 86) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.387871e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.784451e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784451e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.382918e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.782553e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.337638 sec - 5,625,993,391 cycles:u # 1.679 GHz - 7,341,502,914 instructions:u # 1.30 insn per cycle - 3.354837574 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1076) (512y: 98) (512z: 1622) +TOTAL : 3.344568 sec + 5,636,916,583 cycles:u # 1.678 GHz + 7,340,847,814 instructions:u # 1.30 insn per cycle + 3.362326044 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1073) (512y: 98) (512z: 1622) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 877dd6de08..21b1d52574 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_05:03:11 +DATE: 2022-12-18_09:14:47 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.699745e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.164021e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271159e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.683926e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.163410e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274972e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.580019 sec - 270,282,708 cycles:u # 0.330 GHz - 663,992,659 instructions:u # 2.46 insn per cycle - 0.877541081 seconds time elapsed +TOTAL : 0.579956 sec + 270,274,619 cycles:u # 0.330 GHz + 663,503,044 instructions:u # 2.45 insn per cycle + 0.878681882 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.670234e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.760846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.760846e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.669811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.759527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.759527e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.430739 sec - 17,003,177,571 cycles:u # 2.638 GHz - 45,796,591,310 instructions:u # 2.69 insn per cycle - 6.448298838 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.433806 sec + 17,025,157,745 cycles:u # 2.640 GHz + 45,789,250,949 instructions:u # 2.69 insn per cycle + 6.451658482 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.980994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292039e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.980056e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.288770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288770e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.670635 sec - 9,631,411,812 cycles:u # 2.614 GHz - 27,017,549,911 instructions:u # 2.81 insn per cycle - 3.688335884 seconds time elapsed +TOTAL : 3.673616 sec + 9,645,055,614 cycles:u # 2.615 GHz + 27,017,025,699 instructions:u # 2.80 insn per cycle + 3.691680940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2488) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.613489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.387225e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.387225e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.595925e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.365320e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.365320e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.425739 sec - 5,531,049,296 cycles:u # 2.267 GHz - 11,656,147,499 instructions:u # 2.11 insn per cycle - 2.443456135 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2289) (512y: 0) (512z: 0) +TOTAL : 2.434561 sec + 5,557,641,564 cycles:u # 2.269 GHz + 11,655,361,191 instructions:u # 2.10 insn per cycle + 2.452425388 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2305) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.017766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.941625e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.941625e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.010362e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.929826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.929826e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.243058 sec - 5,113,433,256 cycles:u # 2.266 GHz - 11,064,219,420 instructions:u # 2.16 insn per cycle - 2.260927065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2078) (512y: 86) (512z: 0) +TOTAL : 2.246772 sec + 5,128,267,056 cycles:u # 2.267 GHz + 11,063,695,271 instructions:u # 2.16 insn per cycle + 2.264741940 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2094) (512y: 86) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.385410e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.781444e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.781444e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.389746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.787835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.787835e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.250292 sec - 5,473,733,215 cycles:u # 1.677 GHz - 7,390,411,284 instructions:u # 1.35 insn per cycle - 3.267935952 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1076) (512y: 98) (512z: 1622) +TOTAL : 3.247323 sec + 5,473,954,471 cycles:u # 1.678 GHz + 7,389,755,849 instructions:u # 1.35 insn per cycle + 3.265390057 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1073) (512y: 98) (512z: 1622) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index d25a844ed4..462dac894f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:59:37 +DATE: 2022-12-18_09:11:13 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.938378e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158225e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270144e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.924703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161092e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272598e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.903222 sec - 1,154,368,814 cycles:u # 1.009 GHz - 1,881,928,655 instructions:u # 1.63 insn per cycle - 1.201664897 seconds time elapsed +TOTAL : 0.908992 sec + 1,159,448,589 cycles:u # 0.998 GHz + 1,880,953,888 instructions:u # 1.62 insn per cycle + 1.219717390 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -67,15 +67,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.669878e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.759613e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.759613e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.669780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.759473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.759473e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.432107 sec - 16,995,893,194 cycles:u # 2.638 GHz - 45,796,591,398 instructions:u # 2.69 insn per cycle - 6.449535953 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.433593 sec + 17,020,370,840 cycles:u # 2.641 GHz + 45,789,251,147 instructions:u # 2.69 insn per cycle + 6.451717627 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 612) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -92,14 +92,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.981586e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292425e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292425e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.989445e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.300656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300656e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.669921 sec - 9,630,465,181 cycles:u # 2.614 GHz - 27,017,550,031 instructions:u # 2.81 insn per cycle - 3.687374549 seconds time elapsed +TOTAL : 3.661526 sec + 9,619,918,279 cycles:u # 2.617 GHz + 27,017,025,621 instructions:u # 2.81 insn per cycle + 3.679384253 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2488) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -117,15 +117,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.617658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.390878e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.390878e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.618079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.390561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.390561e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.423367 sec - 5,525,107,062 cycles:u # 2.266 GHz - 11,656,147,584 instructions:u # 2.11 insn per cycle - 2.441010774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2289) (512y: 0) (512z: 0) +TOTAL : 2.424065 sec + 5,530,661,985 cycles:u # 2.268 GHz + 11,655,361,293 instructions:u # 2.11 insn per cycle + 2.442296708 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2305) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +142,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.971491e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.875767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.875767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.021248e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.953284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.953284e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.262468 sec - 5,151,147,785 cycles:u # 2.265 GHz - 11,064,219,750 instructions:u # 2.15 insn per cycle - 2.279934079 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2078) (512y: 86) (512z: 0) +TOTAL : 2.241729 sec + 5,119,903,814 cycles:u # 2.269 GHz + 11,063,695,663 instructions:u # 2.16 insn per cycle + 2.259827329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2094) (512y: 86) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -167,15 +167,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.378547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.776375e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.776375e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.386741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784453e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.256861 sec - 5,476,617,388 cycles:u # 1.676 GHz - 7,390,411,753 instructions:u # 1.35 insn per cycle - 3.274465637 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1076) (512y: 98) (512z: 1622) +TOTAL : 3.251601 sec + 5,479,258,680 cycles:u # 1.678 GHz + 7,389,756,055 instructions:u # 1.35 insn per cycle + 3.269758855 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1073) (512y: 98) (512z: 1622) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 3e1f67caa4..22d7b7b706 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:16:06 +DATE: 2022-12-18_08:28:04 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.508351e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156331e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273754e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.492768e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158885e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275084e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.541149 sec - 164,675,992 cycles:u # 0.206 GHz - 238,259,225 instructions:u # 1.45 insn per cycle - 0.983205689 seconds time elapsed +TOTAL : 0.540552 sec + 163,621,756 cycles:u # 0.207 GHz + 237,944,842 instructions:u # 1.45 insn per cycle + 0.906358723 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.712077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.807087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.807087e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.709284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.803993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.803993e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.281375 sec - 16,604,629,696 cycles:u # 2.637 GHz - 44,772,373,259 instructions:u # 2.70 insn per cycle - 6.336323075 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 562) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.291229 sec + 16,641,990,944 cycles:u # 2.638 GHz + 44,765,034,016 instructions:u # 2.69 insn per cycle + 6.343165981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 563) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.147600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.496806e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.496806e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.147058e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495284e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.487098 sec - 9,136,253,445 cycles:u # 2.608 GHz - 25,785,386,820 instructions:u # 2.82 insn per cycle - 3.542214144 seconds time elapsed +TOTAL : 3.488632 sec + 9,149,028,647 cycles:u # 2.610 GHz + 25,784,862,913 instructions:u # 2.82 insn per cycle + 3.531657340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2305) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.236889e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.884938e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.884938e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.244261e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.896277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.896277e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.629966 sec - 5,988,763,450 cycles:u # 2.263 GHz - 12,762,848,343 instructions:u # 2.13 insn per cycle - 2.699877036 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2381) (512y: 0) (512z: 0) +TOTAL : 2.625894 sec + 5,989,073,967 cycles:u # 2.266 GHz + 12,762,061,730 instructions:u # 2.13 insn per cycle + 2.652903906 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2397) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.408103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.107455e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.107455e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.427892e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.133955e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.133955e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.533538 sec - 5,773,388,648 cycles:u # 2.264 GHz - 12,334,500,918 instructions:u # 2.14 insn per cycle - 2.571425604 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2091) (512y: 241) (512z: 0) +TOTAL : 2.523477 sec + 5,756,512,894 cycles:u # 2.266 GHz + 12,333,976,546 instructions:u # 2.14 insn per cycle + 2.557816931 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2107) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.268572e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638330e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.274362e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.645463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.645463e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.364523 sec - 5,646,374,156 cycles:u # 1.671 GHz - 8,745,178,073 instructions:u # 1.55 insn per cycle - 3.402610249 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1013) (512y: 204) (512z: 1700) +TOTAL : 3.358897 sec + 5,644,925,261 cycles:u # 1.672 GHz + 8,744,522,684 instructions:u # 1.55 insn per cycle + 3.390662017 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1010) (512y: 204) (512z: 1700) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index f7c602f943..20c0795fb6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:42:08 +DATE: 2022-12-18_08:53:45 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.510558e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157009e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273398e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.523736e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155775e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274212e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.541512 sec - 164,369,135 cycles:u # 0.206 GHz - 236,948,127 instructions:u # 1.44 insn per cycle - 0.857664627 seconds time elapsed +TOTAL : 0.542401 sec + 165,464,966 cycles:u # 0.207 GHz + 238,400,711 instructions:u # 1.44 insn per cycle + 0.858901912 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.198061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.360512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.360512e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.199052e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359687e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359687e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.929036 sec - 12,982,653,979 cycles:u # 2.625 GHz - 34,748,616,795 instructions:u # 2.68 insn per cycle - 4.948670662 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 658) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.926715 sec + 12,981,243,281 cycles:u # 2.627 GHz + 34,749,665,158 instructions:u # 2.68 insn per cycle + 4.946485965 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.691461e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.942933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.942933e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.695864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.948627e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.948627e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.054344 sec - 10,648,505,335 cycles:u # 2.616 GHz - 23,086,584,879 instructions:u # 2.17 insn per cycle - 4.074163439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2532) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.048966 sec + 10,641,790,581 cycles:u # 2.617 GHz + 23,087,109,487 instructions:u # 2.17 insn per cycle + 4.069169670 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.196143e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.833968e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.833968e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.200754e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.836762e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.836762e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.657098 sec - 6,050,127,552 cycles:u # 2.262 GHz - 11,192,670,405 instructions:u # 1.85 insn per cycle - 2.677110007 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2716) (512y: 0) (512z: 0) +TOTAL : 2.654527 sec + 6,049,451,269 cycles:u # 2.264 GHz + 11,191,884,071 instructions:u # 1.85 insn per cycle + 2.674633464 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2715) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.250091e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.902760e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.902760e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.263011e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.916034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.916034e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.624730 sec - 5,977,158,388 cycles:u # 2.263 GHz - 10,311,593,171 instructions:u # 1.73 insn per cycle - 2.644646158 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2308) (512y: 162) (512z: 0) +TOTAL : 2.617735 sec + 5,965,672,415 cycles:u # 2.264 GHz + 10,310,806,983 instructions:u # 1.73 insn per cycle + 2.637503450 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2307) (512y: 162) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.538999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.979162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.979162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.531983e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.971924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.971924e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.122540 sec - 5,269,631,552 cycles:u # 1.679 GHz - 7,904,808,246 instructions:u # 1.50 insn per cycle - 3.142110927 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1618) (512y: 257) (512z: 1640) +TOTAL : 3.127532 sec + 5,278,472,320 cycles:u # 1.679 GHz + 7,904,152,910 instructions:u # 1.50 insn per cycle + 3.147372890 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1615) (512y: 257) (512z: 1640) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index d9e81d2c4c..dbc5db7b2a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:42:40 +DATE: 2022-12-18_08:54:17 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.554475e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156203e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274680e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.510379e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157821e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275876e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.544810 sec - 163,614,232 cycles:u # 0.206 GHz - 236,501,785 instructions:u # 1.45 insn per cycle - 0.852342652 seconds time elapsed +TOTAL : 0.541919 sec + 164,194,949 cycles:u # 0.205 GHz + 237,782,552 instructions:u # 1.45 insn per cycle + 0.859419201 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.270756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.441061e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.441061e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.264555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433836e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433836e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.775311 sec - 12,578,115,286 cycles:u # 2.625 GHz - 35,402,149,058 instructions:u # 2.81 insn per cycle - 4.795692228 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 444) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.787807 sec + 12,541,933,812 cycles:u # 2.611 GHz + 35,403,197,868 instructions:u # 2.82 insn per cycle + 4.807443195 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 445) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.707870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.965300e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.965300e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.711708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.967980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.967980e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.030741 sec - 10,581,724,801 cycles:u # 2.615 GHz - 22,423,088,939 instructions:u # 2.12 insn per cycle - 4.050534964 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2345) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.025799 sec + 10,573,002,230 cycles:u # 2.615 GHz + 22,423,612,980 instructions:u # 2.12 insn per cycle + 4.045661726 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2344) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.599054e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.373607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.373607e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.606714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.380534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.380534e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.438292 sec - 5,552,929,891 cycles:u # 2.262 GHz - 10,726,313,537 instructions:u # 1.93 insn per cycle - 2.458318420 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2155) (512y: 0) (512z: 0) +TOTAL : 2.436451 sec + 5,545,019,054 cycles:u # 2.262 GHz + 10,725,527,140 instructions:u # 1.93 insn per cycle + 2.456468074 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2154) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.495870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.231211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.231211e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.514346e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.253890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.253890e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.489167 sec - 5,667,429,403 cycles:u # 2.262 GHz - 9,888,180,276 instructions:u # 1.74 insn per cycle - 2.508796682 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 116) (512z: 0) +TOTAL : 2.479165 sec + 5,648,756,998 cycles:u # 2.264 GHz + 9,887,395,477 instructions:u # 1.75 insn per cycle + 2.499275077 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1832) (512y: 116) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.704819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.188236e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.188236e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.706072e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.190650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.190650e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.988904 sec - 5,055,834,223 cycles:u # 1.682 GHz - 7,698,997,502 instructions:u # 1.52 insn per cycle - 3.008657172 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1292) (512y: 193) (512z: 1354) +TOTAL : 2.987962 sec + 5,056,793,978 cycles:u # 1.683 GHz + 7,698,342,031 instructions:u # 1.52 insn per cycle + 3.007806021 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1289) (512y: 193) (512z: 1354) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index cfb0167ad7..b182338610 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:16:40 +DATE: 2022-12-18_08:28:37 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.010233e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.187743e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672251e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.001841e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.221203e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.710335e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.496210 sec - 138,616,114 cycles:u # 0.187 GHz - 209,594,452 instructions:u # 1.51 insn per cycle - 0.935654892 seconds time elapsed +TOTAL : 0.592387 sec + 137,563,763 cycles:u # 0.185 GHz + 210,023,715 instructions:u # 1.53 insn per cycle + 0.919264041 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.744807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.822556e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.822556e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.744118e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.822399e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.822399e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 6.118803 sec - 16,263,148,725 cycles:u # 2.653 GHz - 45,491,160,009 instructions:u # 2.80 insn per cycle - 6.175158841 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 616) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.121847 sec + 16,280,123,016 cycles:u # 2.655 GHz + 45,492,209,328 instructions:u # 2.79 insn per cycle + 6.154709847 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.258139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.777561e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.777561e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.263385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.787217e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.787217e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.572696 sec - 6,781,421,134 cycles:u # 2.625 GHz - 16,968,561,658 instructions:u # 2.50 insn per cycle - 2.755363475 seconds time elapsed +TOTAL : 2.600709 sec + 6,776,311,698 cycles:u # 2.626 GHz + 16,967,775,345 instructions:u # 2.50 insn per cycle + 2.625690393 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3109) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.993192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000305e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000305e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.006859e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.000958e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000958e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.420473 sec - 3,284,035,962 cycles:u # 2.296 GHz - 7,306,231,985 instructions:u # 2.22 insn per cycle - 1.537555439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2725) (512y: 0) (512z: 0) +TOTAL : 1.557520 sec + 3,282,881,290 cycles:u # 2.295 GHz + 7,305,838,893 instructions:u # 2.23 insn per cycle + 1.572720120 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.423651e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.066306e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.066306e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.446908e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.070154e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.070154e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.353936 sec - 3,133,028,723 cycles:u # 2.295 GHz - 7,009,483,865 instructions:u # 2.24 insn per cycle - 1.415091389 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2574) (512y: 12) (512z: 0) +TOTAL : 1.431929 sec + 3,127,201,391 cycles:u # 2.298 GHz + 7,009,090,682 instructions:u # 2.24 insn per cycle + 1.450965914 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2573) (512y: 12) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.415808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.625859e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.625859e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.449707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.671831e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.671831e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.744002 sec - 3,055,257,163 cycles:u # 1.742 GHz - 5,111,170,840 instructions:u # 1.67 insn per cycle - 1.893918679 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 22) (512z: 1840) +TOTAL : 1.822329 sec + 3,045,790,830 cycles:u # 1.744 GHz + 5,110,842,813 instructions:u # 1.68 insn per cycle + 1.841386143 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1708) (512y: 22) (512z: 1840) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 61c84239c9..fd3df236aa 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:52:55 +DATE: 2022-12-18_09:04:33 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.545644e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.247684e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.247684e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.546381e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129882e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129882e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086808e+00 +- 3.414087e-03 ) GeV^0 -TOTAL : 0.733481 sec - 709,988,686 cycles:u # 0.726 GHz - 1,326,560,910 instructions:u # 1.87 insn per cycle - 1.036913624 seconds time elapsed +TOTAL : 0.731479 sec + 708,899,997 cycles:u # 0.725 GHz + 1,327,928,721 instructions:u # 1.87 insn per cycle + 1.036875234 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -71,15 +71,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.737609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814606e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814606e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.736963e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814105e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 6.208862 sec - 16,396,807,965 cycles:u # 2.635 GHz - 45,542,938,494 instructions:u # 2.78 insn per cycle - 6.227286697 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 616) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.211552 sec + 16,418,152,225 cycles:u # 2.637 GHz + 45,543,987,413 instructions:u # 2.77 insn per cycle + 6.230708213 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -97,14 +97,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.221522e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.730456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.730456e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.225164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.736579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.736579e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.661637 sec - 6,929,222,554 cycles:u # 2.588 GHz - 17,245,784,813 instructions:u # 2.49 insn per cycle - 2.680199957 seconds time elapsed +TOTAL : 2.659727 sec + 6,929,321,472 cycles:u # 2.590 GHz + 17,244,998,656 instructions:u # 2.49 insn per cycle + 2.678250398 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3109) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -123,15 +123,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.842975e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.765137e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.765137e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.880553e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.813557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.813557e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.514771 sec - 3,444,876,735 cycles:u # 2.251 GHz - 7,540,529,626 instructions:u # 2.19 insn per cycle - 1.533506706 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2725) (512y: 0) (512z: 0) +TOTAL : 1.507249 sec + 3,433,787,750 cycles:u # 2.254 GHz + 7,540,136,145 instructions:u # 2.20 insn per cycle + 1.525982524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.268969e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042366e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042366e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.274674e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.043609e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.445204 sec - 3,289,758,402 cycles:u # 2.252 GHz - 7,243,780,493 instructions:u # 2.20 insn per cycle - 1.463689066 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2574) (512y: 12) (512z: 0) +TOTAL : 1.445798 sec + 3,288,558,035 cycles:u # 2.251 GHz + 7,243,387,370 instructions:u # 2.20 insn per cycle + 1.464600435 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2573) (512y: 12) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -175,15 +175,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.336389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.524279e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.524279e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.343946e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.518536e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.518536e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.834140 sec - 3,220,227,729 cycles:u # 1.741 GHz - 5,362,835,338 instructions:u # 1.67 insn per cycle - 1.852706019 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 22) (512z: 1840) +TOTAL : 1.832575 sec + 3,213,618,671 cycles:u # 1.739 GHz + 5,362,507,681 instructions:u # 1.67 insn per cycle + 1.851314932 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1708) (512y: 22) (512z: 1840) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 96cad08152..3604cb3dfd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_05:07:24 +DATE: 2022-12-18_09:18:59 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.055481e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.215127e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.673216e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.044109e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.233363e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.699207e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.612435 sec - 381,234,642 cycles:u # 0.453 GHz - 654,111,821 instructions:u # 1.72 insn per cycle - 0.900180287 seconds time elapsed +TOTAL : 0.613180 sec + 381,239,124 cycles:u # 0.452 GHz + 655,039,349 instructions:u # 1.72 insn per cycle + 0.901111703 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.742362e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821402e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.743346e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.820760e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079554e+00 +- 3.404558e-03 ) GeV^0 -TOTAL : 6.209558 sec - 16,414,180,337 cycles:u # 2.639 GHz - 45,516,071,173 instructions:u # 2.77 insn per cycle - 6.222124552 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 616) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.207225 sec + 16,413,626,009 cycles:u # 2.641 GHz + 45,517,119,996 instructions:u # 2.77 insn per cycle + 6.219938178 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.261767e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.781821e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.781821e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.260979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.784706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.784706e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079557e+00 +- 3.404577e-03 ) GeV^0 -TOTAL : 2.652537 sec - 6,911,207,440 cycles:u # 2.596 GHz - 16,976,731,129 instructions:u # 2.46 insn per cycle - 2.665008417 seconds time elapsed +TOTAL : 2.654500 sec + 6,916,143,693 cycles:u # 2.596 GHz + 16,975,944,888 instructions:u # 2.45 insn per cycle + 2.667071155 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3109) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.999466e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.994359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.994359e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.995215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.986676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.986676e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079536e+00 +- 3.404088e-03 ) GeV^0 -TOTAL : 1.501325 sec - 3,414,229,521 cycles:u # 2.261 GHz - 7,286,054,034 instructions:u # 2.13 insn per cycle - 1.513858812 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2725) (512y: 0) (512z: 0) +TOTAL : 1.503978 sec + 3,418,182,375 cycles:u # 2.259 GHz + 7,285,660,938 instructions:u # 2.13 insn per cycle + 1.516713427 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.390635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.061773e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061773e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.353649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.068811e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.068811e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079536e+00 +- 3.404088e-03 ) GeV^0 -TOTAL : 1.441434 sec - 3,268,572,057 cycles:u # 2.258 GHz - 6,955,751,508 instructions:u # 2.13 insn per cycle - 1.453728897 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2574) (512y: 12) (512z: 0) +TOTAL : 1.448285 sec + 3,297,392,309 cycles:u # 2.262 GHz + 6,955,358,433 instructions:u # 2.11 insn per cycle + 1.460958768 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2573) (512y: 12) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.454181e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.668889e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.668889e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.431791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.642331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.642331e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079536e+00 +- 3.404088e-03 ) GeV^0 -TOTAL : 1.816895 sec - 3,175,859,115 cycles:u # 1.739 GHz - 5,057,438,301 instructions:u # 1.59 insn per cycle - 1.829216577 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 22) (512z: 1840) +TOTAL : 1.825370 sec + 3,190,632,632 cycles:u # 1.739 GHz + 5,057,110,724 instructions:u # 1.58 insn per cycle + 1.838092333 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1708) (512y: 22) (512z: 1840) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index b3412fd0f4..b69c708ac2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_05:03:44 +DATE: 2022-12-18_09:15:19 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.050051e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.224892e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.675562e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.046328e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.236003e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.693006e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.527098 sec - 240,356,611 cycles:u # 0.318 GHz - 630,441,508 instructions:u # 2.62 insn per cycle - 0.814450492 seconds time elapsed +TOTAL : 0.527580 sec + 240,779,328 cycles:u # 0.318 GHz + 630,680,578 instructions:u # 2.62 insn per cycle + 0.815531962 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.744097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821409e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.744780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.822348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.822348e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 6.121087 sec - 16,263,330,096 cycles:u # 2.653 GHz - 45,491,161,112 instructions:u # 2.80 insn per cycle - 6.134883603 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 616) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.119274 sec + 16,272,420,599 cycles:u # 2.655 GHz + 45,492,210,248 instructions:u # 2.80 insn per cycle + 6.132110906 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.265044e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.783761e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.783761e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.263915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.784561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.784561e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.567757 sec - 6,772,083,899 cycles:u # 2.627 GHz - 16,968,562,292 instructions:u # 2.51 insn per cycle - 2.580431186 seconds time elapsed +TOTAL : 2.568972 sec + 6,771,850,909 cycles:u # 2.626 GHz + 16,967,776,098 instructions:u # 2.51 insn per cycle + 2.582025932 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3109) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.998304e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000630e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000630e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.011733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.001519e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001519e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.419267 sec - 3,283,748,871 cycles:u # 2.298 GHz - 7,306,233,232 instructions:u # 2.22 insn per cycle - 1.432272016 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2725) (512y: 0) (512z: 0) +TOTAL : 1.417351 sec + 3,281,103,998 cycles:u # 2.299 GHz + 7,305,839,853 instructions:u # 2.23 insn per cycle + 1.430043536 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.362778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.065522e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.065522e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.430718e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.071153e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.071153e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.363141 sec - 3,156,834,372 cycles:u # 2.300 GHz - 7,009,484,837 instructions:u # 2.22 insn per cycle - 1.375807704 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2574) (512y: 12) (512z: 0) +TOTAL : 1.351332 sec + 3,132,581,935 cycles:u # 2.301 GHz + 7,009,091,615 instructions:u # 2.24 insn per cycle + 1.364303261 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2573) (512y: 12) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.439813e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.659819e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.659819e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.432237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.643537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.643537e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.736641 sec - 3,049,041,642 cycles:u # 1.746 GHz - 5,111,171,674 instructions:u # 1.68 insn per cycle - 1.749226011 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 22) (512z: 1840) +TOTAL : 1.739174 sec + 3,049,919,121 cycles:u # 1.745 GHz + 5,110,844,139 instructions:u # 1.68 insn per cycle + 1.752212917 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1708) (512y: 22) (512z: 1840) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index b2ac16ea26..e878fc75ad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_05:00:10 +DATE: 2022-12-18_09:11:45 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.697049e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.200956e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656756e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.649662e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.235443e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.691767e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086808e+00 +- 3.414087e-03 ) GeV^0 -TOTAL : 0.673060 sec - 626,896,494 cycles:u # 0.695 GHz - 1,204,230,412 instructions:u # 1.92 insn per cycle - 0.960328674 seconds time elapsed +TOTAL : 0.673952 sec + 630,885,460 cycles:u # 0.697 GHz + 1,204,748,554 instructions:u # 1.91 insn per cycle + 0.963272337 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -67,15 +67,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.745154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.822479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.822479e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.744371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821811e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 6.116627 sec - 16,257,395,632 cycles:u # 2.654 GHz - 45,491,161,527 instructions:u # 2.80 insn per cycle - 6.129357340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 616) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.120424 sec + 16,274,664,093 cycles:u # 2.655 GHz + 45,492,210,108 instructions:u # 2.80 insn per cycle + 6.133314011 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -92,14 +92,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.257995e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.775904e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.775904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.269472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.790164e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.790164e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.572021 sec - 6,777,875,773 cycles:u # 2.625 GHz - 16,968,562,642 instructions:u # 2.50 insn per cycle - 2.584828528 seconds time elapsed +TOTAL : 2.565532 sec + 6,769,150,398 cycles:u # 2.628 GHz + 16,967,776,399 instructions:u # 2.51 insn per cycle + 2.578146923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3109) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -117,15 +117,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.989132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000290e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.009111e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.001820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001820e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.420688 sec - 3,286,945,655 cycles:u # 2.299 GHz - 7,306,233,253 instructions:u # 2.22 insn per cycle - 1.433295549 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2725) (512y: 0) (512z: 0) +TOTAL : 1.417213 sec + 3,282,238,480 cycles:u # 2.300 GHz + 7,305,839,927 instructions:u # 2.23 insn per cycle + 1.429950683 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2723) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +142,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.436515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.068544e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.068544e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.436887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.068255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.068255e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.350209 sec - 3,128,914,184 cycles:u # 2.301 GHz - 7,009,485,208 instructions:u # 2.24 insn per cycle - 1.362617495 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2574) (512y: 12) (512z: 0) +TOTAL : 1.350450 sec + 3,130,230,703 cycles:u # 2.301 GHz + 7,009,091,791 instructions:u # 2.24 insn per cycle + 1.363309769 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2573) (512y: 12) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -167,15 +167,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.451217e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.666002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.666002e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.463440e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.684417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.684417e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.733687 sec - 3,042,218,471 cycles:u # 1.745 GHz - 5,111,171,611 instructions:u # 1.68 insn per cycle - 1.746073148 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 22) (512z: 1840) +TOTAL : 1.731064 sec + 3,040,533,140 cycles:u # 1.746 GHz + 5,110,844,188 instructions:u # 1.68 insn per cycle + 1.743928459 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1708) (512y: 22) (512z: 1840) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index ee14d5f84b..0da601c7f5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:17:09 +DATE: 2022-12-18_08:29:05 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.516190e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.698748e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.028920e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.526455e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.701652e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.031794e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.494399 sec - 135,627,152 cycles:u # 0.182 GHz - 210,142,307 instructions:u # 1.55 insn per cycle - 0.906343239 seconds time elapsed +TOTAL : 0.494407 sec + 136,852,492 cycles:u # 0.185 GHz + 210,401,894 instructions:u # 1.54 insn per cycle + 1.175549175 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.776256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.857019e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857019e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.772647e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.853513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.853513e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 6.013245 sec - 15,976,854,090 cycles:u # 2.652 GHz - 44,718,606,130 instructions:u # 2.80 insn per cycle - 6.050066847 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.024644 sec + 16,014,469,679 cycles:u # 2.654 GHz + 44,719,654,505 instructions:u # 2.79 insn per cycle + 6.072084710 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,14 +91,14 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.109421e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.877470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.877470e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.127001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.897440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.897440e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.162983 sec - 5,681,674,369 cycles:u # 2.614 GHz - 16,140,028,459 instructions:u # 2.84 insn per cycle - 2.236443125 seconds time elapsed +TOTAL : 2.155465 sec + 5,670,663,561 cycles:u # 2.619 GHz + 16,139,242,111 instructions:u # 2.85 insn per cycle + 2.170279693 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2845) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.692919e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.635024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.635024e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.704044e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.647478e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.647478e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.951264 sec - 4,490,952,021 cycles:u # 2.288 GHz - 8,989,944,771 instructions:u # 2.00 insn per cycle - 2.008229569 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3292) (512y: 0) (512z: 0) +TOTAL : 1.956335 sec + 4,485,495,475 cycles:u # 2.291 GHz + 8,989,551,370 instructions:u # 2.00 insn per cycle + 1.988753832 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3290) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.815792e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.806770e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.806770e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.827560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.816582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.816582e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.911817 sec - 4,400,150,932 cycles:u # 2.288 GHz - 8,701,584,589 instructions:u # 1.98 insn per cycle - 1.963503726 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3188) (512y: 5) (512z: 0) +TOTAL : 1.908404 sec + 4,397,256,115 cycles:u # 2.292 GHz + 8,701,191,458 instructions:u # 1.98 insn per cycle + 1.924132025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3187) (512y: 5) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.736727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.363919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.363919e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.745326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.376829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.376829e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 2.323075 sec - 3,968,391,843 cycles:u # 1.701 GHz - 6,988,798,548 instructions:u # 1.76 insn per cycle - 2.387003250 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2320) (512y: 12) (512z: 2176) +TOTAL : 2.318631 sec + 3,963,244,286 cycles:u # 1.701 GHz + 6,988,470,850 instructions:u # 1.76 insn per cycle + 2.333917643 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2317) (512y: 12) (512z: 2176) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index f64d1693c9..42e8efc8ae 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:43:11 +DATE: 2022-12-18_08:54:48 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.735230e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.192526e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.661826e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.805587e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.208358e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.709327e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.496553 sec - 136,991,947 cycles:u # 0.186 GHz - 210,687,635 instructions:u # 1.54 insn per cycle - 0.793611614 seconds time elapsed +TOTAL : 0.495714 sec + 136,607,915 cycles:u # 0.185 GHz + 209,836,975 instructions:u # 1.54 insn per cycle + 0.797803343 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 119 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.272549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.407065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.407065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.274555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.408945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.408945e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 4.726245 sec - 12,528,392,418 cycles:u # 2.645 GHz - 34,949,656,246 instructions:u # 2.79 insn per cycle - 4.740333709 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.721250 sec + 12,527,666,745 cycles:u # 2.648 GHz + 34,948,607,680 instructions:u # 2.79 insn per cycle + 4.734925995 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 697) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.948838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.670635e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.670635e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.944792e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.664146e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.664146e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.230036 sec - 5,859,501,371 cycles:u # 2.615 GHz - 14,013,886,918 instructions:u # 2.39 insn per cycle - 2.244084761 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3014) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.232498 sec + 5,865,487,614 cycles:u # 2.616 GHz + 14,012,838,215 instructions:u # 2.39 insn per cycle + 2.247546244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3011) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.846430e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.266388e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.266388e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.838104e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.249128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.249128e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.642516 sec - 3,786,748,935 cycles:u # 2.290 GHz - 7,856,459,147 instructions:u # 2.07 insn per cycle - 1.656736216 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3645) (512y: 0) (512z: 0) +TOTAL : 1.643834 sec + 3,792,047,376 cycles:u # 2.292 GHz + 7,856,066,058 instructions:u # 2.07 insn per cycle + 1.657709184 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3643) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.910833e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.357885e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.357885e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.953767e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.420063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.420063e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.627442 sec - 3,751,531,035 cycles:u # 2.290 GHz - 7,425,359,404 instructions:u # 1.98 insn per cycle - 1.641772394 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3416) (512y: 0) (512z: 0) +TOTAL : 1.618629 sec + 3,735,471,517 cycles:u # 2.292 GHz + 7,424,965,869 instructions:u # 1.99 insn per cycle + 1.632766254 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3415) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.611654e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.517117e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.517117e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.614918e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.525557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.525557e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.979538 sec - 3,428,429,330 cycles:u # 1.723 GHz - 6,354,267,723 instructions:u # 1.85 insn per cycle - 1.993571179 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3536) (512y: 0) (512z: 2006) +TOTAL : 1.978766 sec + 3,425,895,102 cycles:u # 1.722 GHz + 6,353,808,974 instructions:u # 1.85 insn per cycle + 1.992906127 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3531) (512y: 0) (512z: 2006) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index a94376a005..fbe368a1f6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2022-12-17_04:43:37 +DATE: 2022-12-18_08:55:14 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.364225e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.708901e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.036399e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.285105e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.702407e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.030715e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.495445 sec - 136,944,288 cycles:u # 0.185 GHz - 209,997,398 instructions:u # 1.53 insn per cycle - 0.798046639 seconds time elapsed +TOTAL : 0.494716 sec + 136,273,499 cycles:u # 0.184 GHz + 210,711,229 instructions:u # 1.55 insn per cycle + 0.797765481 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -66,15 +66,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.417685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.570585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.570585e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.426619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.580255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580255e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086785e+00 +- 3.413819e-03 ) GeV^0 -TOTAL : 4.448441 sec - 11,784,743,746 cycles:u # 2.643 GHz - 35,133,419,476 instructions:u # 2.98 insn per cycle - 4.463072950 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 451) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.432083 sec + 11,753,366,752 cycles:u # 2.646 GHz + 35,132,371,129 instructions:u # 2.99 insn per cycle + 4.445919352 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -91,15 +91,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.914115e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.638966e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.638966e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.922274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639873e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086786e+00 +- 3.413831e-03 ) GeV^0 -TOTAL : 2.244793 sec - 5,900,270,770 cycles:u # 2.616 GHz - 13,687,588,831 instructions:u # 2.32 insn per cycle - 2.258760526 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2513) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.240928 sec + 5,896,753,528 cycles:u # 2.618 GHz + 13,686,540,107 instructions:u # 2.32 insn per cycle + 2.255102487 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2510) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +116,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.973492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.450257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.450257e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.989328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.472985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.472985e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.614003 sec - 3,720,913,207 cycles:u # 2.289 GHz - 7,626,394,127 instructions:u # 2.05 insn per cycle - 1.628111340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 0) (512z: 0) +TOTAL : 1.610751 sec + 3,718,406,603 cycles:u # 2.292 GHz + 7,626,000,878 instructions:u # 2.05 insn per cycle + 1.624902756 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -141,15 +141,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.098022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.636572e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.636572e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.092443e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.623342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.623342e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086781e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.587113 sec - 3,661,666,670 cycles:u # 2.291 GHz - 7,218,440,861 instructions:u # 1.97 insn per cycle - 1.601218697 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2720) (512y: 2) (512z: 0) +TOTAL : 1.588131 sec + 3,667,620,822 cycles:u # 2.293 GHz + 7,218,047,728 instructions:u # 1.97 insn per cycle + 1.601852676 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2719) (512y: 2) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.794304e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.781598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.781598e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.780625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.746072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.746072e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086782e+00 +- 3.413783e-03 ) GeV^0 -TOTAL : 1.920668 sec - 3,336,944,371 cycles:u # 1.728 GHz - 6,186,837,137 instructions:u # 1.85 insn per cycle - 1.934696362 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2807) (512y: 0) (512z: 1582) +TOTAL : 1.923982 sec + 3,339,481,660 cycles:u # 1.727 GHz + 6,186,378,102 instructions:u # 1.85 insn per cycle + 1.937894222 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2802) (512y: 0) (512z: 1582) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 5c81b374fa..c3b8492b4f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:17:40 +DATE: 2022-12-18_08:29:34 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.589959e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.024878e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.039922e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.619968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027376e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.041529e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.497532 sec - 143,744,449 cycles:u # 0.205 GHz - 176,978,802 instructions:u # 1.23 insn per cycle - 0.917627290 seconds time elapsed +TOTAL : 0.484254 sec + 141,329,475 cycles:u # 0.201 GHz + 176,940,887 instructions:u # 1.25 insn per cycle + 0.815944250 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.063814e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.290033e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.303802e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.054105e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.280665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.293750e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.617909 sec - 208,450,633 cycles:u # 0.239 GHz - 331,679,894 instructions:u # 1.59 insn per cycle - 0.931610684 seconds time elapsed +TOTAL : 0.617658 sec + 206,074,224 cycles:u # 0.237 GHz + 326,072,151 instructions:u # 1.58 insn per cycle + 0.932692977 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.280412e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.305339e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.305339e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.278486e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.303133e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.303133e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 7.207975 sec - 19,255,100,188 cycles:u # 2.669 GHz - 59,082,848,717 instructions:u # 3.07 insn per cycle - 7.266473058 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1169) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.213935 sec + 19,287,542,830 cycles:u # 2.672 GHz + 59,082,357,147 instructions:u # 3.06 insn per cycle + 7.255068599 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1174) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.278717e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.365725e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.365725e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.300162e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.387908e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.387908e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.851704 sec - 10,273,843,663 cycles:u # 2.666 GHz - 30,696,458,363 instructions:u # 2.99 insn per cycle - 3.915361389 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5148) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.859738 sec + 10,242,865,699 cycles:u # 2.669 GHz + 30,696,048,883 instructions:u # 3.00 insn per cycle + 3.885228704 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5145) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.043939e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.356211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.356211e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.032263e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.352953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.352953e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.060080 sec - 4,711,452,343 cycles:u # 2.281 GHz - 10,967,759,959 instructions:u # 2.33 insn per cycle - 2.131123205 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4153) (512y: 0) (512z: 0) +TOTAL : 2.095414 sec + 4,723,422,927 cycles:u # 2.283 GHz + 10,967,719,316 instructions:u # 2.32 insn per cycle + 2.125411009 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4148) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.061360e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.453924e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.453924e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.065322e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.456183e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.456183e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.831119 sec - 4,191,230,659 cycles:u # 2.283 GHz - 10,163,065,901 instructions:u # 2.42 insn per cycle - 1.907687826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3948) (512y: 32) (512z: 0) +TOTAL : 1.849625 sec + 4,192,345,044 cycles:u # 2.284 GHz + 10,163,025,374 instructions:u # 2.42 insn per cycle + 1.874713868 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3943) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.788585e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.011068e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.011068e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.794575e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.018306e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.018306e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.438404 sec - 3,931,136,522 cycles:u # 1.609 GHz - 5,597,762,946 instructions:u # 1.42 insn per cycle - 2.481080858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1102) (512y: 62) (512z: 3426) +TOTAL : 2.444555 sec + 3,931,797,172 cycles:u # 1.610 GHz + 5,597,804,691 instructions:u # 1.42 insn per cycle + 2.491129792 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1098) (512y: 62) (512z: 3431) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index dfc9a7b2be..af0844b726 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:53:24 +DATE: 2022-12-18_09:05:01 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.574481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.048353e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.048353e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.566224e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.871586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.871586e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.547637 sec - 340,168,511 cycles:u # 0.442 GHz - 564,278,923 instructions:u # 1.66 insn per cycle - 0.828928735 seconds time elapsed +TOTAL : 0.549764 sec + 341,851,669 cycles:u # 0.439 GHz + 565,159,093 instructions:u # 1.65 insn per cycle + 0.837601428 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.555890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.379713e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.379713e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.550228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.166649e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.166649e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.980980 sec - 1,043,183,437 cycles:u # 0.838 GHz - 1,723,873,331 instructions:u # 1.65 insn per cycle - 1.306749003 seconds time elapsed +TOTAL : 0.987580 sec + 1,048,452,748 cycles:u # 0.835 GHz + 1,733,863,527 instructions:u # 1.65 insn per cycle + 1.313528355 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,15 +88,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.275256e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.300299e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.300299e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.273507e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.298364e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.298364e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 7.234023 sec - 19,316,613,559 cycles:u # 2.668 GHz - 59,096,719,169 instructions:u # 3.06 insn per cycle - 7.242328277 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1169) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.239874 sec + 19,344,716,550 cycles:u # 2.670 GHz + 59,096,227,796 instructions:u # 3.05 insn per cycle + 7.248091724 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1174) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.267495e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.355446e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.355446e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.292754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.381580e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.381580e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.870474 sec - 10,326,916,612 cycles:u # 2.665 GHz - 30,742,758,214 instructions:u # 2.98 insn per cycle - 3.878650776 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5148) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.847674 sec + 10,275,567,797 cycles:u # 2.667 GHz + 30,742,348,652 instructions:u # 2.99 insn per cycle + 3.855951185 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5145) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.013707e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.329141e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.329141e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.027784e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.343789e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.343789e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.076613 sec - 4,747,306,608 cycles:u # 2.280 GHz - 11,015,092,403 instructions:u # 2.32 insn per cycle - 2.085104215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4153) (512y: 0) (512z: 0) +TOTAL : 2.073088 sec + 4,744,061,345 cycles:u # 2.283 GHz + 11,015,051,459 instructions:u # 2.32 insn per cycle + 2.081300763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4148) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.036191e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.431519e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.431519e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.007458e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.406616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.406616e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.845004 sec - 4,220,572,966 cycles:u # 2.281 GHz - 10,210,398,074 instructions:u # 2.42 insn per cycle - 1.853507420 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3948) (512y: 32) (512z: 0) +TOTAL : 1.851831 sec + 4,238,704,587 cycles:u # 2.283 GHz + 10,210,356,937 instructions:u # 2.41 insn per cycle + 1.860176941 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3943) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.782247e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.010074e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.010074e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.786303e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.012175e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012175e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.449422 sec - 3,958,951,267 cycles:u # 1.613 GHz - 5,633,869,916 instructions:u # 1.42 insn per cycle - 2.457902133 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1102) (512y: 62) (512z: 3426) +TOTAL : 2.447694 sec + 3,958,834,258 cycles:u # 1.614 GHz + 5,633,911,029 instructions:u # 1.42 insn per cycle + 2.456044796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1098) (512y: 62) (512z: 3431) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 018c7aad58..5b20789edc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:18:15 +DATE: 2022-12-18_08:30:08 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.502591e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.011176e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.024845e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.521227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.018039e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031236e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.478692 sec - 142,288,629 cycles:u # 0.203 GHz - 178,636,824 instructions:u # 1.26 insn per cycle - 0.885078583 seconds time elapsed +TOTAL : 0.525370 sec + 143,007,272 cycles:u # 0.200 GHz + 179,803,543 instructions:u # 1.26 insn per cycle + 0.818025051 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.054091e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.274940e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.288136e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.057981e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.279752e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292244e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.620887 sec - 202,649,318 cycles:u # 0.232 GHz - 319,490,193 instructions:u # 1.58 insn per cycle - 0.933726713 seconds time elapsed +TOTAL : 0.615203 sec + 202,685,390 cycles:u # 0.231 GHz + 326,603,446 instructions:u # 1.61 insn per cycle + 0.937025456 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.269266e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.293907e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.293907e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.273678e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.298319e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.298319e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 7.242911 sec - 19,351,101,144 cycles:u # 2.670 GHz - 59,349,482,112 instructions:u # 3.07 insn per cycle - 7.283101769 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1291) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.234403 sec + 19,323,893,170 cycles:u # 2.671 GHz + 59,348,990,450 instructions:u # 3.07 insn per cycle + 7.287342522 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.332543e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.422720e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.422720e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.347160e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.437199e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.437199e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.803771 sec - 10,158,270,627 cycles:u # 2.667 GHz - 30,346,060,734 instructions:u # 2.99 insn per cycle - 3.919923885 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4984) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.821687 sec + 10,130,587,171 cycles:u # 2.669 GHz + 30,345,651,352 instructions:u # 3.00 insn per cycle + 3.830656650 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4981) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.758507e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.044885e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.044885e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.741957e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.030371e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.030371e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.134318 sec - 4,881,589,794 cycles:u # 2.282 GHz - 11,372,942,550 instructions:u # 2.33 insn per cycle - 2.169949378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4319) (512y: 0) (512z: 0) +TOTAL : 2.150296 sec + 4,891,470,818 cycles:u # 2.282 GHz + 11,372,901,404 instructions:u # 2.33 insn per cycle + 2.174360767 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4314) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.430761e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.777097e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.777097e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.444008e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.783173e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.783173e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.966369 sec - 4,498,245,324 cycles:u # 2.282 GHz - 10,602,330,116 instructions:u # 2.36 insn per cycle - 2.039063107 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4025) (512y: 186) (512z: 0) +TOTAL : 1.977346 sec + 4,493,022,398 cycles:u # 2.283 GHz + 10,602,289,471 instructions:u # 2.36 insn per cycle + 1.986224232 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4020) (512y: 186) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.745866e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.967471e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.967471e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.740295e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.964997e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.964997e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.453346 sec - 3,956,178,401 cycles:u # 1.609 GHz - 5,821,608,041 instructions:u # 1.47 insn per cycle - 2.489286093 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1065) (512y: 109) (512z: 3494) +TOTAL : 2.455222 sec + 3,954,451,735 cycles:u # 1.608 GHz + 5,821,648,837 instructions:u # 1.47 insn per cycle + 2.485905922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1061) (512y: 109) (512z: 3499) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index c258b0c59e..3276d6b6f5 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:18:51 +DATE: 2022-12-18_08:30:42 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.474962e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.350835e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455827e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.479968e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.378653e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.492765e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.460546 sec - 127,984,286 cycles:u # 0.187 GHz - 151,003,971 instructions:u # 1.18 insn per cycle - 0.914185516 seconds time elapsed +TOTAL : 0.462496 sec + 126,258,737 cycles:u # 0.184 GHz + 151,025,496 instructions:u # 1.20 insn per cycle + 0.742785380 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.235766e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.463387e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.547282e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.228907e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.461979e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.546437e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.511478 sec - 133,905,530 cycles:u # 0.176 GHz - 196,172,015 instructions:u # 1.47 insn per cycle - 0.819305344 seconds time elapsed +TOTAL : 0.508998 sec + 134,275,441 cycles:u # 0.179 GHz + 196,285,386 instructions:u # 1.46 insn per cycle + 0.808765554 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.327215e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.348075e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.348075e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.332318e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353076e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353076e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009130e+02 +- 5.002690e+01 ) GeV^-2 -TOTAL : 7.060990 sec - 18,863,141,836 cycles:u # 2.670 GHz - 59,516,812,699 instructions:u # 3.16 insn per cycle - 7.106425028 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 952) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.045624 sec + 18,841,764,504 cycles:u # 2.673 GHz + 59,516,812,917 instructions:u # 3.16 insn per cycle + 7.053070779 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.700410e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.934373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.934373e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.701186e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.940069e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.940069e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009131e+02 +- 5.002691e+01 ) GeV^-2 -TOTAL : 2.147459 sec - 5,734,792,781 cycles:u # 2.665 GHz - 16,570,823,673 instructions:u # 2.89 insn per cycle - 2.339299886 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5854) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.147531 sec + 5,738,137,865 cycles:u # 2.666 GHz + 16,570,700,907 instructions:u # 2.89 insn per cycle + 2.155154969 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5851) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.574065e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.672357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.672357e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.571613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669094e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669094e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 1.061445 sec - 2,439,706,658 cycles:u # 2.288 GHz - 5,831,457,536 instructions:u # 2.39 insn per cycle - 1.101972065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4397) (512y: 0) (512z: 0) +TOTAL : 1.062998 sec + 2,443,822,044 cycles:u # 2.289 GHz + 5,831,375,854 instructions:u # 2.39 insn per cycle + 1.070674387 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4378) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.747212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.870490e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.870490e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.747656e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.870715e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870715e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 0.958239 sec - 2,203,452,293 cycles:u # 2.289 GHz - 5,401,892,737 instructions:u # 2.45 insn per cycle - 1.042028714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4188) (512y: 25) (512z: 0) +TOTAL : 0.957943 sec + 2,203,827,291 cycles:u # 2.290 GHz + 5,401,647,187 instructions:u # 2.45 insn per cycle + 0.965398602 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4143) (512y: 25) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.376177e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.454768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.454768e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.375599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.455499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.455499e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008902e+02 +- 5.002664e+01 ) GeV^-2 -TOTAL : 1.212592 sec - 1,985,176,366 cycles:u # 1.631 GHz - 3,070,628,124 instructions:u # 1.55 insn per cycle - 1.279069975 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1396) (512y: 33) (512z: 3544) +TOTAL : 1.213090 sec + 1,988,761,988 cycles:u # 1.633 GHz + 3,070,638,068 instructions:u # 1.54 insn per cycle + 1.220726639 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1392) (512y: 33) (512z: 3549) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index ea8989b0ca..e5f9b229af 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:53:57 +DATE: 2022-12-18_09:05:35 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.849046e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.175390e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.175390e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.826852e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.099215e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.099215e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009024e+02 +- 5.001174e+01 ) GeV^-2 -TOTAL : 0.482600 sec - 196,651,648 cycles:u # 0.281 GHz - 362,894,409 instructions:u # 1.85 insn per cycle - 0.759632337 seconds time elapsed +TOTAL : 0.481466 sec + 196,296,749 cycles:u # 0.280 GHz + 363,241,616 instructions:u # 1.85 insn per cycle + 0.759041802 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.697463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.520269e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.520269e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.693018e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.507058e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.507058e+07 ) sec^-1 MeanMatrixElemValue = ( 6.681599e+02 +- 4.773210e+02 ) GeV^-2 -TOTAL : 0.685822 sec - 540,818,293 cycles:u # 0.575 GHz - 1,020,115,106 instructions:u # 1.89 insn per cycle - 1.000318369 seconds time elapsed +TOTAL : 0.686379 sec + 540,036,178 cycles:u # 0.575 GHz + 1,020,347,683 instructions:u # 1.89 insn per cycle + 0.998565835 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,15 +88,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.325281e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.346097e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.346097e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.329533e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.350363e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.350363e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009130e+02 +- 5.002690e+01 ) GeV^-2 -TOTAL : 7.071794 sec - 18,883,162,822 cycles:u # 2.669 GHz - 59,524,721,025 instructions:u # 3.15 insn per cycle - 7.079552680 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 952) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.058520 sec + 18,872,710,821 cycles:u # 2.672 GHz + 59,524,721,129 instructions:u # 3.15 insn per cycle + 7.066054141 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 957) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.678713e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.923252e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.923252e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.677878e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.912039e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.912039e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009131e+02 +- 5.002691e+01 ) GeV^-2 -TOTAL : 2.158963 sec - 5,761,665,163 cycles:u # 2.663 GHz - 16,617,269,268 instructions:u # 2.88 insn per cycle - 2.166662178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5854) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.158606 sec + 5,750,314,134 cycles:u # 2.662 GHz + 16,617,146,430 instructions:u # 2.89 insn per cycle + 2.166319614 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5851) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.569059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.667338e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.667338e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.567111e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664843e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 1.069486 sec - 2,456,223,770 cycles:u # 2.286 GHz - 5,866,677,756 instructions:u # 2.39 insn per cycle - 1.077374218 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4397) (512y: 0) (512z: 0) +TOTAL : 1.070623 sec + 2,461,704,247 cycles:u # 2.288 GHz + 5,866,595,541 instructions:u # 2.38 insn per cycle + 1.078625290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4378) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.737556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865632e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865632e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.745153e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.867646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.867646e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 0.968542 sec - 2,228,286,221 cycles:u # 2.289 GHz - 5,437,112,601 instructions:u # 2.44 insn per cycle - 0.976350399 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4188) (512y: 25) (512z: 0) +TOTAL : 0.963676 sec + 2,216,869,067 cycles:u # 2.289 GHz + 5,436,866,716 instructions:u # 2.45 insn per cycle + 0.971396560 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4143) (512y: 25) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.371581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.449942e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.449942e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.373743e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.451067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.451067e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008902e+02 +- 5.002664e+01 ) GeV^-2 -TOTAL : 1.221433 sec - 2,002,703,718 cycles:u # 1.634 GHz - 3,110,450,362 instructions:u # 1.55 insn per cycle - 1.229533003 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1396) (512y: 33) (512z: 3544) +TOTAL : 1.219698 sec + 2,003,686,500 cycles:u # 1.636 GHz + 3,110,460,203 instructions:u # 1.55 insn per cycle + 1.227466074 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1392) (512y: 33) (512z: 3549) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 1835a4efca..eb5db28733 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2022-12-17_04:19:21 +DATE: 2022-12-18_08:31:10 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.501839e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.390528e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493679e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.483062e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368599e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487186e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.463765 sec - 126,530,470 cycles:u # 0.185 GHz - 150,076,415 instructions:u # 1.19 insn per cycle - 0.893992525 seconds time elapsed +TOTAL : 0.460367 sec + 124,961,572 cycles:u # 0.183 GHz + 150,603,421 instructions:u # 1.21 insn per cycle + 0.740734798 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.237066e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.467397e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.551189e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.164193e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.331142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.410963e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.510707 sec - 134,198,864 cycles:u # 0.177 GHz - 197,434,466 instructions:u # 1.47 insn per cycle - 0.818268531 seconds time elapsed +TOTAL : 0.594692 sec + 135,305,968 cycles:u # 0.162 GHz + 197,725,400 instructions:u # 1.46 insn per cycle + 0.894550923 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.333909e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.354793e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.354793e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.337772e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359004e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359004e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009130e+02 +- 5.002690e+01 ) GeV^-2 -TOTAL : 7.040322 sec - 18,812,690,568 cycles:u # 2.671 GHz - 59,280,070,508 instructions:u # 3.15 insn per cycle - 7.119705526 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1013) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.028683 sec + 18,797,328,021 cycles:u # 2.673 GHz + 59,280,070,618 instructions:u # 3.15 insn per cycle + 7.036028803 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1018) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.073540e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.330466e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.330466e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.081884e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.344567e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.344567e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009131e+02 +- 5.002691e+01 ) GeV^-2 -TOTAL : 2.048618 sec - 5,468,999,164 cycles:u # 2.664 GHz - 16,378,266,326 instructions:u # 2.99 insn per cycle - 2.196820417 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5640) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.046479 sec + 5,470,117,728 cycles:u # 2.667 GHz + 16,378,143,101 instructions:u # 2.99 insn per cycle + 2.054233862 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5637) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.360430e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.433250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.433250e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.363707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.436787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436787e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 1.224461 sec - 2,808,310,100 cycles:u # 2.285 GHz - 6,394,044,222 instructions:u # 2.28 insn per cycle - 1.255134558 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5043) (512y: 0) (512z: 0) +TOTAL : 1.221608 sec + 2,805,887,505 cycles:u # 2.288 GHz + 6,393,962,551 instructions:u # 2.28 insn per cycle + 1.229420256 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5024) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.466018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.551401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.551401e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.470665e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.555877e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555877e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008906e+02 +- 5.002663e+01 ) GeV^-2 -TOTAL : 1.137884 sec - 2,612,805,827 cycles:u # 2.286 GHz - 5,948,748,548 instructions:u # 2.28 insn per cycle - 1.213387817 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4855) (512y: 18) (512z: 0) +TOTAL : 1.133971 sec + 2,607,712,292 cycles:u # 2.290 GHz + 5,948,502,986 instructions:u # 2.28 insn per cycle + 1.141630793 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4810) (512y: 18) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [h Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.269010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.338792e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338792e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.265283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.334324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334324e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008902e+02 +- 5.002664e+01 ) GeV^-2 -TOTAL : 1.312882 sec - 2,139,144,625 cycles:u # 1.624 GHz - 3,367,265,948 instructions:u # 1.57 insn per cycle - 1.356406898 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1685) (512y: 31) (512z: 3735) +TOTAL : 1.316879 sec + 2,140,619,769 cycles:u # 1.622 GHz + 3,367,276,016 instructions:u # 1.57 insn per cycle + 1.324862828 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1681) (512y: 31) (512z: 3740) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 78593b1780..1dbbae938a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:19:51 +DATE: 2022-12-18_08:31:38 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.466556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495130e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497449e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.425406e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454651e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457082e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.557587 sec - 177,149,974 cycles:u # 0.227 GHz - 271,577,305 instructions:u # 1.53 insn per cycle - 0.993915347 seconds time elapsed +TOTAL : 0.539622 sec + 177,744,318 cycles:u # 0.228 GHz + 273,873,545 instructions:u # 1.54 insn per cycle + 0.836698232 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.128313e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.162570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.164049e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.123481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.156613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.157970e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.067918 sec - 2,308,041,983 cycles:u # 0.691 GHz - 4,733,327,830 instructions:u # 2.05 insn per cycle - 3.398106584 seconds time elapsed +TOTAL : 3.063189 sec + 2,372,314,494 cycles:u # 0.712 GHz + 4,780,115,139 instructions:u # 2.01 insn per cycle + 3.391283725 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.737391e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.739212e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.739212e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.730329e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732127e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732127e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.447087 sec - 25,225,682,781 cycles:u # 2.669 GHz - 78,683,974,722 instructions:u # 3.12 insn per cycle - 9.532773345 seconds time elapsed +TOTAL : 9.492253 sec + 25,365,414,797 cycles:u # 2.673 GHz + 78,685,858,526 instructions:u # 3.10 insn per cycle + 9.500262688 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.402957e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.409956e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409956e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.400584e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.407684e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.407684e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.826823 sec - 12,886,714,702 cycles:u # 2.667 GHz - 39,208,189,638 instructions:u # 3.04 insn per cycle - 4.933024088 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13081) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.830398 sec + 12,907,197,410 cycles:u # 2.669 GHz + 39,208,934,975 instructions:u # 3.04 insn per cycle + 4.839501256 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.863734e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.891952e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.891952e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.776094e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.803531e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.803531e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.396914 sec - 5,447,270,744 cycles:u # 2.268 GHz - 13,805,008,025 instructions:u # 2.53 insn per cycle - 2.459670905 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10947) (512y: 0) (512z: 0) +TOTAL : 2.428013 sec + 5,521,549,631 cycles:u # 2.270 GHz + 13,805,380,563 instructions:u # 2.50 insn per cycle + 2.435996981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10955) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.716222e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.753118e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.753118e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.725262e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.761186e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.761186e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.133013 sec - 4,845,877,903 cycles:u # 2.267 GHz - 12,450,829,039 instructions:u # 2.57 insn per cycle - 2.179167458 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10638) (512y: 29) (512z: 0) +TOTAL : 2.130581 sec + 4,844,454,491 cycles:u # 2.269 GHz + 12,451,202,075 instructions:u # 2.57 insn per cycle + 2.138350041 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10646) (512y: 29) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.442126e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.466831e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.466831e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.443476e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468126e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468126e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.553548 sec - 4,033,612,911 cycles:u # 1.577 GHz - 6,339,398,656 instructions:u # 1.57 insn per cycle - 2.627750352 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1361) (512y: 69) (512z: 9961) +TOTAL : 2.553112 sec + 4,034,043,720 cycles:u # 1.577 GHz + 6,339,584,780 instructions:u # 1.57 insn per cycle + 2.560774984 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1363) (512y: 69) (512z: 9967) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index dcf2f323d3..534566ac20 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:54:26 +DATE: 2022-12-18_09:06:03 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.805271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.448306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.448306e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.802500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443994e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533096 sec - 173,049,146 cycles:u # 0.222 GHz - 283,235,768 instructions:u # 1.64 insn per cycle - 0.838269294 seconds time elapsed +TOTAL : 0.536534 sec + 175,793,236 cycles:u # 0.228 GHz + 281,471,755 instructions:u # 1.60 insn per cycle + 0.832298175 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.213778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.106804e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.106804e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.226931e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.131427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.131427e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.533080 sec - 3,450,713,899 cycles:u # 0.903 GHz - 6,470,863,490 instructions:u # 1.88 insn per cycle - 3.884156847 seconds time elapsed +TOTAL : 3.527453 sec + 3,408,709,802 cycles:u # 0.893 GHz + 6,579,105,733 instructions:u # 1.93 insn per cycle + 3.873613277 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,14 +88,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.739498e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.741334e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.741334e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.730792e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732601e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.440831 sec - 25,216,325,202 cycles:u # 2.670 GHz - 78,687,477,914 instructions:u # 3.12 insn per cycle - 9.449427734 seconds time elapsed +TOTAL : 9.488056 sec + 25,363,347,678 cycles:u # 2.672 GHz + 78,689,362,558 instructions:u # 3.10 insn per cycle + 9.496629760 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.396499e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.403675e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.403675e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.403055e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410134e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410134e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.841204 sec - 12,920,118,459 cycles:u # 2.666 GHz - 39,217,667,359 instructions:u # 3.04 insn per cycle - 4.849693629 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13081) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.831841 sec + 12,892,543,349 cycles:u # 2.667 GHz + 39,218,412,924 instructions:u # 3.04 insn per cycle + 4.840077350 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.851513e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.880023e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.880023e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.768762e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.796256e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.796256e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.406276 sec - 5,461,767,195 cycles:u # 2.264 GHz - 13,811,949,909 instructions:u # 2.53 insn per cycle - 2.415625685 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10947) (512y: 0) (512z: 0) +TOTAL : 2.435578 sec + 5,528,683,298 cycles:u # 2.265 GHz + 13,812,322,890 instructions:u # 2.50 insn per cycle + 2.443930462 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10955) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.710441e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.746251e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.746251e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.736878e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.772874e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.772874e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.139606 sec - 4,852,208,808 cycles:u # 2.264 GHz - 12,457,771,420 instructions:u # 2.57 insn per cycle - 2.148093920 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10638) (512y: 29) (512z: 0) +TOTAL : 2.132459 sec + 4,844,615,868 cycles:u # 2.266 GHz + 12,458,143,812 instructions:u # 2.57 insn per cycle + 2.140931310 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10646) (512y: 29) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.436746e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.461841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.461841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.437359e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462512e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462512e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.560848 sec - 4,042,757,707 cycles:u # 1.576 GHz - 6,346,928,753 instructions:u # 1.57 insn per cycle - 2.569358271 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1361) (512y: 69) (512z: 9961) +TOTAL : 2.560784 sec + 4,044,818,594 cycles:u # 1.576 GHz + 6,347,114,797 instructions:u # 1.57 insn per cycle + 2.569361167 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1363) (512y: 69) (512z: 9967) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index eec2e299db..1351197484 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:07:52 +DATE: 2022-12-18_09:19:27 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.443204e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.470833e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.472983e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.426641e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.456884e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.378368 sec - 147,455,615 cycles:u # 0.297 GHz - 244,985,610 instructions:u # 1.66 insn per cycle - 0.674365779 seconds time elapsed +TOTAL : 0.521628 sec + 149,841,494 cycles:u # 0.198 GHz + 241,047,307 instructions:u # 1.61 insn per cycle + 0.817820803 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.121456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.150800e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.152074e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.144035e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176966e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178370e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.198635 sec - 2,571,026,019 cycles:u # 0.742 GHz - 5,161,923,330 instructions:u # 2.01 insn per cycle - 3.520474152 seconds time elapsed +TOTAL : 3.194140 sec + 2,561,192,194 cycles:u # 0.739 GHz + 4,948,394,545 instructions:u # 1.93 insn per cycle + 3.525213827 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.736474e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.738288e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.738288e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.732695e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.734505e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.734505e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 9.453630 sec - 25,234,910,591 cycles:u # 2.668 GHz - 78,683,446,615 instructions:u # 3.12 insn per cycle - 9.461681641 seconds time elapsed +TOTAL : 9.474502 sec + 25,327,962,887 cycles:u # 2.672 GHz + 78,685,331,290 instructions:u # 3.11 insn per cycle + 9.482364773 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.403474e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.410444e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.410444e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.403102e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410127e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410127e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.827801 sec - 12,894,500,188 cycles:u # 2.669 GHz - 39,207,134,289 instructions:u # 3.04 insn per cycle - 4.835645299 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13081) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.828461 sec + 12,892,186,886 cycles:u # 2.669 GHz + 39,207,879,538 instructions:u # 3.04 insn per cycle + 4.837451993 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.861899e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.889988e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.889988e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.766790e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.794431e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.794431e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.399422 sec - 5,453,989,433 cycles:u # 2.268 GHz - 13,803,424,718 instructions:u # 2.53 insn per cycle - 2.407433228 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10947) (512y: 0) (512z: 0) +TOTAL : 2.433205 sec + 5,530,623,173 cycles:u # 2.269 GHz + 13,803,797,339 instructions:u # 2.50 insn per cycle + 2.440904210 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10955) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.721164e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.756927e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.756927e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.533867e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.567762e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.567762e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.133532 sec - 4,850,863,277 cycles:u # 2.268 GHz - 12,448,198,015 instructions:u # 2.57 insn per cycle - 2.141370180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10638) (512y: 29) (512z: 0) +TOTAL : 2.187055 sec + 4,967,299,679 cycles:u # 2.267 GHz + 12,448,570,867 instructions:u # 2.51 insn per cycle + 2.194669229 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10646) (512y: 29) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.445015e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.470085e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.470085e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.444184e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468983e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468983e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.554357 sec - 4,034,734,857 cycles:u # 1.577 GHz - 6,336,767,015 instructions:u # 1.57 insn per cycle - 2.561931805 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1361) (512y: 69) (512z: 9961) +TOTAL : 2.554620 sec + 4,037,039,872 cycles:u # 1.578 GHz + 6,336,953,616 instructions:u # 1.57 insn per cycle + 2.562423042 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1363) (512y: 69) (512z: 9967) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index c9e3398f5f..c08ed9504e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:04:11 +DATE: 2022-12-18_09:15:47 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.450584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.478416e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481114e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.455868e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.483654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486328e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.522268 sec - 141,132,614 cycles:u # 0.187 GHz - 240,760,514 instructions:u # 1.71 insn per cycle - 0.818435951 seconds time elapsed +TOTAL : 0.520613 sec + 144,390,260 cycles:u # 0.191 GHz + 242,827,967 instructions:u # 1.68 insn per cycle + 0.816434008 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.129273e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.160521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.161875e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.154755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.187368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.188869e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.102592 sec - 2,454,973,937 cycles:u # 0.729 GHz - 5,172,562,177 instructions:u # 2.11 insn per cycle - 3.424989769 seconds time elapsed +TOTAL : 3.091822 sec + 2,427,526,042 cycles:u # 0.721 GHz + 5,191,308,037 instructions:u # 2.14 insn per cycle + 3.423634785 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.739052e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.740864e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.740864e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.730859e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732658e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732658e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.437874 sec - 25,216,235,541 cycles:u # 2.671 GHz - 78,683,975,358 instructions:u # 3.12 insn per cycle - 9.445399087 seconds time elapsed +TOTAL : 9.482623 sec + 25,350,492,077 cycles:u # 2.672 GHz + 78,685,859,519 instructions:u # 3.10 insn per cycle + 9.490321081 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.405246e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.412215e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.412215e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.407034e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414034e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414034e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.823430 sec - 12,885,110,654 cycles:u # 2.669 GHz - 39,208,190,665 instructions:u # 3.04 insn per cycle - 4.831106095 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13081) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.820796 sec + 12,889,820,113 cycles:u # 2.671 GHz + 39,208,936,339 instructions:u # 3.04 insn per cycle + 4.828267509 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.864227e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.892604e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.892604e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.781039e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.808366e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.808366e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.396654 sec - 5,448,837,101 cycles:u # 2.269 GHz - 13,805,009,063 instructions:u # 2.53 insn per cycle - 2.404140418 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10947) (512y: 0) (512z: 0) +TOTAL : 2.425927 sec + 5,520,458,811 cycles:u # 2.271 GHz + 13,805,381,340 instructions:u # 2.50 insn per cycle + 2.433850542 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10955) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.722950e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.758975e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.758975e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.733434e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.768994e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.768994e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.130980 sec - 4,843,472,321 cycles:u # 2.268 GHz - 12,450,829,936 instructions:u # 2.57 insn per cycle - 2.138715755 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10638) (512y: 29) (512z: 0) +TOTAL : 2.128233 sec + 4,841,204,243 cycles:u # 2.270 GHz + 12,451,202,674 instructions:u # 2.57 insn per cycle + 2.135865487 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10646) (512y: 29) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.437390e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.462169e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.462169e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.439663e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.464336e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.464336e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.555320 sec - 4,034,247,785 cycles:u # 1.576 GHz - 6,339,399,550 instructions:u # 1.57 insn per cycle - 2.562810795 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1361) (512y: 69) (512z: 9961) +TOTAL : 2.554526 sec + 4,034,389,096 cycles:u # 1.577 GHz + 6,339,586,348 instructions:u # 1.57 insn per cycle + 2.562555200 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1363) (512y: 69) (512z: 9967) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 2831ea8249..6ccc497ea9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:00:37 +DATE: 2022-12-18_09:12:13 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.841055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.469171e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.471512e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.838873e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454895e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457293e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.529626 sec - 169,893,045 cycles:u # 0.221 GHz - 280,620,159 instructions:u # 1.65 insn per cycle - 0.828975834 seconds time elapsed +TOTAL : 0.529685 sec + 168,675,920 cycles:u # 0.219 GHz + 279,933,352 instructions:u # 1.66 insn per cycle + 0.832673842 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -60,14 +60,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.271875e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.139700e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.141026e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.300421e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.198708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.200100e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.426253 sec - 3,285,893,891 cycles:u # 0.890 GHz - 6,285,904,271 instructions:u # 1.91 insn per cycle - 3.749298206 seconds time elapsed +TOTAL : 3.411200 sec + 3,298,198,905 cycles:u # 0.894 GHz + 6,437,791,880 instructions:u # 1.95 insn per cycle + 3.744662399 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -81,14 +81,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.741144e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.742963e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.742963e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.729424e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.731224e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.731224e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.426721 sec - 25,188,402,557 cycles:u # 2.671 GHz - 78,683,975,488 instructions:u # 3.12 insn per cycle - 9.434347199 seconds time elapsed +TOTAL : 9.490494 sec + 25,366,778,555 cycles:u # 2.672 GHz + 78,685,859,504 instructions:u # 3.10 insn per cycle + 9.499182354 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -106,15 +106,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.399297e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.406264e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.406264e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.405502e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.412528e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.412528e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.832573 sec - 12,897,781,423 cycles:u # 2.667 GHz - 39,208,190,885 instructions:u # 3.04 insn per cycle - 4.840977045 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13081) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.823257 sec + 12,891,894,627 cycles:u # 2.670 GHz + 39,208,936,142 instructions:u # 3.04 insn per cycle + 4.831007668 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13087) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,15 +131,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.862707e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.891099e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.891099e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.724710e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.751508e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.751508e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.397288 sec - 5,448,194,740 cycles:u # 2.268 GHz - 13,805,009,057 instructions:u # 2.53 insn per cycle - 2.404871894 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10947) (512y: 0) (512z: 0) +TOTAL : 2.446362 sec + 5,560,869,204 cycles:u # 2.270 GHz + 13,805,381,671 instructions:u # 2.48 insn per cycle + 2.454456996 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10955) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +156,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.727583e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.762921e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.762921e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.735784e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.771365e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.771365e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.129820 sec - 4,842,227,147 cycles:u # 2.269 GHz - 12,450,830,612 instructions:u # 2.57 insn per cycle - 2.137425210 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10638) (512y: 29) (512z: 0) +TOTAL : 2.127553 sec + 4,840,299,510 cycles:u # 2.270 GHz + 12,451,203,185 instructions:u # 2.57 insn per cycle + 2.135394286 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10646) (512y: 29) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -181,15 +181,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.437009e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.461927e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.461927e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.449537e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.474317e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.474317e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.555429 sec - 4,034,323,777 cycles:u # 1.577 GHz - 6,339,399,665 instructions:u # 1.57 insn per cycle - 2.563146515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1361) (512y: 69) (512z: 9961) +TOTAL : 2.550627 sec + 4,031,996,178 cycles:u # 1.578 GHz + 6,339,586,420 instructions:u # 1.57 insn per cycle + 2.558565019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1363) (512y: 69) (512z: 9967) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 3365c6d659..8bb3c185d7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:20:34 +DATE: 2022-12-18_08:32:20 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.464626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.493230e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.495493e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.443969e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.472752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.475118e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.539980 sec - 177,394,473 cycles:u # 0.227 GHz - 273,312,906 instructions:u # 1.54 insn per cycle - 0.938229604 seconds time elapsed +TOTAL : 0.540785 sec + 178,446,276 cycles:u # 0.230 GHz + 270,420,249 instructions:u # 1.52 insn per cycle + 0.837439922 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133983e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168421e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169892e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.141953e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176691e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.068155 sec - 2,313,216,136 cycles:u # 0.692 GHz - 4,600,504,151 instructions:u # 1.99 insn per cycle - 3.399381783 seconds time elapsed +TOTAL : 3.062074 sec + 2,298,898,169 cycles:u # 0.690 GHz + 4,580,568,690 instructions:u # 1.99 insn per cycle + 3.390849758 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.736995e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.738819e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.738819e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.741502e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.743321e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.743321e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 9.448895 sec - 25,235,730,328 cycles:u # 2.670 GHz - 78,422,889,524 instructions:u # 3.11 insn per cycle - 9.513053140 seconds time elapsed +TOTAL : 9.442019 sec + 25,202,460,531 cycles:u # 2.673 GHz + 78,424,773,823 instructions:u # 3.11 insn per cycle + 9.449692449 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4125) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.392916e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.399863e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.399863e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.404635e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.411806e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.411806e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.840757 sec - 12,926,250,950 cycles:u # 2.668 GHz - 39,155,083,676 instructions:u # 3.03 insn per cycle - 4.925509218 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12851) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.824205 sec + 12,887,542,367 cycles:u # 2.670 GHz + 39,155,829,363 instructions:u # 3.04 insn per cycle + 4.832385006 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.725090e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.751976e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.751976e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.699678e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.727002e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.727002e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.445905 sec - 5,556,372,058 cycles:u # 2.268 GHz - 13,893,601,857 instructions:u # 2.50 insn per cycle - 2.545450832 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11049) (512y: 0) (512z: 0) +TOTAL : 2.455195 sec + 5,582,540,439 cycles:u # 2.270 GHz + 13,893,974,154 instructions:u # 2.49 insn per cycle + 2.463055043 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.524769e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.558480e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.558480e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.604549e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.638881e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.638881e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.186767 sec - 4,970,492,448 cycles:u # 2.267 GHz - 12,546,770,598 instructions:u # 2.52 insn per cycle - 2.280401432 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10664) (512y: 180) (512z: 0) +TOTAL : 2.163930 sec + 4,922,812,259 cycles:u # 2.270 GHz + 12,547,143,353 instructions:u # 2.55 insn per cycle + 2.171867479 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10672) (512y: 180) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.420788e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.445242e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.445242e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.417924e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.442418e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.442418e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.561645 sec - 4,044,399,243 cycles:u # 1.577 GHz - 6,432,673,597 instructions:u # 1.59 insn per cycle - 2.671015895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1278) (512y: 169) (512z:10063) +TOTAL : 2.562789 sec + 4,050,565,382 cycles:u # 1.578 GHz + 6,432,859,666 instructions:u # 1.59 insn per cycle + 2.570582115 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1280) (512y: 169) (512z:10069) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index bf8732eb2a..1b3c302f8f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:44:03 +DATE: 2022-12-18_08:55:41 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.192066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.220244e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.222342e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.231974e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.260514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.262630e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.549082 sec - 188,987,060 cycles:u # 0.241 GHz - 290,232,759 instructions:u # 1.54 insn per cycle - 0.844177431 seconds time elapsed +TOTAL : 0.550638 sec + 186,604,707 cycles:u # 0.237 GHz + 288,514,515 instructions:u # 1.55 insn per cycle + 0.844537946 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.729751e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.757709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.758897e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.759531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788093e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789311e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.322600 sec - 2,545,314,839 cycles:u # 0.708 GHz - 5,070,626,699 instructions:u # 1.99 insn per cycle - 3.650755644 seconds time elapsed +TOTAL : 3.309194 sec + 2,569,702,803 cycles:u # 0.716 GHz + 5,227,451,318 instructions:u # 2.03 insn per cycle + 3.643965589 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.895274e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.896194e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.896194e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.857944e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.858847e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.858847e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 42.110378 sec - 112,335,196,052 cycles:u # 2.667 GHz - 145,295,468,366 instructions:u # 1.29 insn per cycle - 42.118262923 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22147) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 42.517529 sec + 113,385,396,798 cycles:u # 2.667 GHz + 145,387,389,363 instructions:u # 1.28 insn per cycle + 42.525239345 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22074) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.996265e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.001790e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.001790e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.002158e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.007647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.007647e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.481659 sec - 14,613,398,338 cycles:u # 2.664 GHz - 37,421,821,501 instructions:u # 2.56 insn per cycle - 5.489706839 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68019) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.470676 sec + 14,610,776,841 cycles:u # 2.669 GHz + 37,422,509,771 instructions:u # 2.56 insn per cycle + 5.478727672 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.172380e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.195168e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.195168e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.209338e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.232320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.232320e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.665091 sec - 6,052,342,322 cycles:u # 2.267 GHz - 12,909,449,554 instructions:u # 2.13 insn per cycle - 2.673441738 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46348) (512y: 0) (512z: 0) +TOTAL : 2.649238 sec + 6,017,256,063 cycles:u # 2.267 GHz + 12,909,792,898 instructions:u # 2.15 insn per cycle + 2.657138577 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46352) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.494055e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.527740e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.527740e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.516859e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.551430e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.551430e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.196376 sec - 4,983,318,118 cycles:u # 2.265 GHz - 11,317,884,572 instructions:u # 2.27 insn per cycle - 2.204472930 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40040) (512y: 192) (512z: 0) +TOTAL : 2.189839 sec + 4,979,827,254 cycles:u # 2.269 GHz + 11,318,228,539 instructions:u # 2.27 insn per cycle + 2.197641081 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40044) (512y: 192) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.633791e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.660193e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.660193e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.617004e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.643586e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.643586e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.480552 sec - 3,913,154,020 cycles:u # 1.574 GHz - 5,779,228,395 instructions:u # 1.48 insn per cycle - 2.488423388 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1935) (512y: 316) (512z:38917) +TOTAL : 2.486812 sec + 3,923,930,030 cycles:u # 1.575 GHz + 5,779,397,983 instructions:u # 1.47 insn per cycle + 2.494865921 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1936) (512y: 316) (512z:38920) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index d0a8e5ab1f..b54df87a51 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:45:23 +DATE: 2022-12-18_08:57:01 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.240118e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.269635e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.271891e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.233971e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.261372e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.263540e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.547394 sec - 187,726,660 cycles:u # 0.240 GHz - 282,460,740 instructions:u # 1.50 insn per cycle - 0.842651592 seconds time elapsed +TOTAL : 0.547531 sec + 187,559,035 cycles:u # 0.239 GHz + 288,753,463 instructions:u # 1.54 insn per cycle + 0.842600896 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.731585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.758984e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.760143e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.720073e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.748054e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.749246e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.321455 sec - 2,610,046,603 cycles:u # 0.727 GHz - 5,176,139,122 instructions:u # 1.98 insn per cycle - 3.649260918 seconds time elapsed +TOTAL : 3.332062 sec + 2,480,062,932 cycles:u # 0.688 GHz + 5,155,402,965 instructions:u # 2.08 insn per cycle + 3.660838832 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.831715e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.832601e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.832601e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.828943e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.829828e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.829828e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 42.808281 sec - 114,164,075,491 cycles:u # 2.667 GHz - 145,241,151,537 instructions:u # 1.27 insn per cycle - 42.816096438 seconds time elapsed +TOTAL : 42.839359 sec + 114,234,931,292 cycles:u # 2.667 GHz + 145,242,806,786 instructions:u # 1.27 insn per cycle + 42.848360132 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22127) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.936373e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.941624e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.941624e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.933818e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.939100e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939100e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.592820 sec - 14,929,425,241 cycles:u # 2.667 GHz - 37,673,467,090 instructions:u # 2.52 insn per cycle - 5.600850962 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68466) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.597704 sec + 14,942,665,146 cycles:u # 2.667 GHz + 37,674,155,089 instructions:u # 2.52 insn per cycle + 5.605661064 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68469) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.296531e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.320159e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.320159e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.291072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.314910e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.314910e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.612338 sec - 5,930,947,135 cycles:u # 2.267 GHz - 12,795,832,013 instructions:u # 2.16 insn per cycle - 2.620152681 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45644) (512y: 0) (512z: 0) +TOTAL : 2.615405 sec + 5,936,713,010 cycles:u # 2.266 GHz + 12,796,176,695 instructions:u # 2.16 insn per cycle + 2.623256768 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45648) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.471462e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.504827e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.504827e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.459246e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.492414e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.492414e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.202575 sec - 5,002,167,854 cycles:u # 2.266 GHz - 11,319,112,437 instructions:u # 2.26 insn per cycle - 2.210664136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:39801) (512y: 140) (512z: 0) +TOTAL : 2.206280 sec + 5,012,199,003 cycles:u # 2.267 GHz + 11,319,456,699 instructions:u # 2.26 insn per cycle + 2.214221921 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:39805) (512y: 140) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.650691e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.677093e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.677093e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.645876e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.672251e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.672251e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.473840 sec - 3,902,413,814 cycles:u # 1.575 GHz - 5,755,178,184 instructions:u # 1.47 insn per cycle - 2.482267606 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1590) (512y: 250) (512z:38627) +TOTAL : 2.475699 sec + 3,904,043,223 cycles:u # 1.574 GHz + 5,755,347,965 instructions:u # 1.47 insn per cycle + 2.483672410 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1591) (512y: 250) (512z:38630) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index c22fbb64bf..5e272a4f25 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:21:18 +DATE: 2022-12-18_08:33:03 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.244106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.294731e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.300659e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.260734e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.315129e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.320717e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.495947 sec - 144,987,135 cycles:u # 0.201 GHz - 199,415,654 instructions:u # 1.38 insn per cycle - 0.892434696 seconds time elapsed +TOTAL : 0.495971 sec + 141,745,260 cycles:u # 0.197 GHz + 200,707,558 instructions:u # 1.42 insn per cycle + 0.778784910 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.486683e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.558241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.561505e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.490192e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.563115e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.566459e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.751522 sec - 1,209,236,676 cycles:u # 0.606 GHz - 2,455,776,551 instructions:u # 2.03 insn per cycle - 2.053919088 seconds time elapsed +TOTAL : 1.750604 sec + 1,216,747,592 cycles:u # 0.610 GHz + 2,422,037,646 instructions:u # 1.99 insn per cycle + 2.055042527 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.798933e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800471e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800471e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.797441e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.798981e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.798981e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 9.121947 sec - 24,358,136,875 cycles:u # 2.670 GHz - 78,099,926,680 instructions:u # 3.21 insn per cycle - 9.173558037 seconds time elapsed +TOTAL : 9.155329 sec + 24,399,379,221 cycles:u # 2.672 GHz + 78,102,007,537 instructions:u # 3.20 insn per cycle + 9.162614409 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3550) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.005196e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.029060e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.029060e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.007990e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.031945e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.031945e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 2.346505 sec - 6,273,082,186 cycles:u # 2.668 GHz - 20,067,928,793 instructions:u # 3.20 insn per cycle - 2.460263538 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13737) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.345722 sec + 6,268,132,183 cycles:u # 2.667 GHz + 20,068,334,013 instructions:u # 3.20 insn per cycle + 2.353212019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13743) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.347252e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.356159e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.356159e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.363479e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.372492e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372492e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.222724 sec - 2,783,322,182 cycles:u # 2.267 GHz - 7,016,234,107 instructions:u # 2.52 insn per cycle - 1.354991161 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11231) (512y: 0) (512z: 0) +TOTAL : 1.208443 sec + 2,750,404,600 cycles:u # 2.267 GHz + 7,016,445,423 instructions:u # 2.55 insn per cycle + 1.216111398 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.527879e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.539259e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.539259e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.528339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.539776e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.539776e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.078890 sec - 2,454,293,601 cycles:u # 2.264 GHz - 6,313,529,149 instructions:u # 2.57 insn per cycle - 1.197556296 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10883) (512y: 32) (512z: 0) +TOTAL : 1.078665 sec + 2,454,168,448 cycles:u # 2.266 GHz + 6,313,740,361 instructions:u # 2.57 insn per cycle + 1.086312740 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10891) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.290325e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.298412e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.298412e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.293521e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.301613e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301613e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.276600 sec - 2,013,929,444 cycles:u # 1.575 GHz - 3,228,703,781 instructions:u # 1.60 insn per cycle - 1.326220177 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1656) (512y: 40) (512z:10079) +TOTAL : 1.273631 sec + 2,018,918,767 cycles:u # 1.579 GHz + 3,228,810,060 instructions:u # 1.60 insn per cycle + 1.281261848 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1658) (512y: 40) (512z:10085) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 70c84b827f..8852581af2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:55:08 +DATE: 2022-12-18_09:06:45 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.372028e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.227037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.227037e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.355069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.215338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.215338e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.482588 sec - 119,898,004 cycles:u # 0.171 GHz - 191,961,030 instructions:u # 1.60 insn per cycle - 0.760433266 seconds time elapsed +TOTAL : 0.484225 sec + 121,196,126 cycles:u # 0.172 GHz + 192,566,647 instructions:u # 1.59 insn per cycle + 0.762552797 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.917551e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.474354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.474354e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.891588e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.442792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.442792e+05 ) sec^-1 MeanMatrixElemValue = ( 6.473419e+00 +- 4.973148e+00 ) GeV^-4 -TOTAL : 1.953445 sec - 1,728,752,614 cycles:u # 0.785 GHz - 3,407,717,243 instructions:u # 1.97 insn per cycle - 2.261109474 seconds time elapsed +TOTAL : 1.955241 sec + 1,672,615,112 cycles:u # 0.759 GHz + 3,457,978,165 instructions:u # 2.07 insn per cycle + 2.263700436 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,14 +88,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.799944e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801495e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801495e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.800279e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801801e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801801e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 9.119605 sec - 24,361,581,256 cycles:u # 2.670 GHz - 78,102,105,613 instructions:u # 3.21 insn per cycle - 9.127319791 seconds time elapsed +TOTAL : 9.117726 sec + 24,375,871,547 cycles:u # 2.672 GHz + 78,104,186,451 instructions:u # 3.20 insn per cycle + 9.125678371 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3550) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.998174e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022593e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022593e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.006527e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.030835e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.030835e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 2.351952 sec - 6,277,150,815 cycles:u # 2.663 GHz - 20,074,406,169 instructions:u # 3.20 insn per cycle - 2.359887095 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13737) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.349200 sec + 6,274,676,472 cycles:u # 2.665 GHz + 20,074,811,775 instructions:u # 3.20 insn per cycle + 2.357189867 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13743) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.349590e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358614e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358614e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.363440e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.372595e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372595e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.223840 sec - 2,780,268,595 cycles:u # 2.263 GHz - 7,023,299,549 instructions:u # 2.53 insn per cycle - 1.231749403 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11231) (512y: 0) (512z: 0) +TOTAL : 1.211169 sec + 2,753,823,906 cycles:u # 2.265 GHz + 7,023,510,891 instructions:u # 2.55 insn per cycle + 1.218894510 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.515588e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.527004e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.527004e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.529606e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.541037e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.541037e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.090562 sec - 2,460,070,430 cycles:u # 2.245 GHz - 6,320,594,698 instructions:u # 2.57 insn per cycle - 1.098581832 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10883) (512y: 32) (512z: 0) +TOTAL : 1.080591 sec + 2,454,273,613 cycles:u # 2.262 GHz + 6,320,805,568 instructions:u # 2.58 insn per cycle + 1.088380976 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10891) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.293353e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.301484e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301484e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.291873e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.300155e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.300155e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.276738 sec - 2,018,769,191 cycles:u # 1.575 GHz - 3,236,269,540 instructions:u # 1.60 insn per cycle - 1.284815123 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1656) (512y: 40) (512z:10079) +TOTAL : 1.278153 sec + 2,023,327,191 cycles:u # 1.577 GHz + 3,236,375,646 instructions:u # 1.60 insn per cycle + 1.286309352 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1658) (512y: 40) (512z:10085) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index eb8b750e2f..93b26b54ed 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:08:34 +DATE: 2022-12-18_09:20:09 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.268861e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.321970e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.327551e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.246623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.301424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.306541e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.479525 sec - 111,407,269 cycles:u # 0.160 GHz - 170,141,760 instructions:u # 1.53 insn per cycle - 0.756680064 seconds time elapsed +TOTAL : 0.477826 sec + 110,284,172 cycles:u # 0.158 GHz + 172,386,904 instructions:u # 1.56 insn per cycle + 0.755075037 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.562585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.628452e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.631402e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.558377e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.628290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.631501e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.849933 sec - 1,440,731,581 cycles:u # 0.691 GHz - 2,799,914,608 instructions:u # 1.94 insn per cycle - 2.144791188 seconds time elapsed +TOTAL : 2.032441 sec + 1,441,953,626 cycles:u # 0.670 GHz + 2,754,367,532 instructions:u # 1.91 insn per cycle + 2.327685344 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.797567e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.799086e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.799086e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.795675e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.797192e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.797192e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208454e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 9.130765 sec - 24,400,148,194 cycles:u # 2.671 GHz - 78,099,578,171 instructions:u # 3.20 insn per cycle - 9.138984551 seconds time elapsed +TOTAL : 9.140261 sec + 24,422,366,224 cycles:u # 2.672 GHz + 78,101,659,110 instructions:u # 3.20 insn per cycle + 9.147886665 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3550) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.998360e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022283e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022283e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.006144e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.030398e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.030398e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208453e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.350658 sec - 6,274,233,954 cycles:u # 2.665 GHz - 20,067,040,118 instructions:u # 3.20 insn per cycle - 2.357953085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13737) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.348093 sec + 6,274,411,968 cycles:u # 2.667 GHz + 20,067,445,791 instructions:u # 3.20 insn per cycle + 2.355360044 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13743) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.349567e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358499e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.315640e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.324085e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324085e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214974e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.222463 sec - 2,780,077,018 cycles:u # 2.266 GHz - 7,014,476,360 instructions:u # 2.52 insn per cycle - 1.229819702 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11231) (512y: 0) (512z: 0) +TOTAL : 1.253834 sec + 2,848,924,867 cycles:u # 2.265 GHz + 7,014,687,224 instructions:u # 2.46 insn per cycle + 1.261173750 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.524451e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.535960e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.535960e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.532427e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543946e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543946e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214974e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.083867 sec - 2,458,463,556 cycles:u # 2.262 GHz - 6,310,722,895 instructions:u # 2.57 insn per cycle - 1.091331717 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10883) (512y: 32) (512z: 0) +TOTAL : 1.077602 sec + 2,451,688,236 cycles:u # 2.266 GHz + 6,310,934,034 instructions:u # 2.57 insn per cycle + 1.084848167 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10891) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.293974e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302203e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.302203e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.292989e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.301356e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301356e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214976e-01 +- 3.255524e-01 ) GeV^-4 -TOTAL : 1.275023 sec - 2,018,745,162 cycles:u # 1.578 GHz - 3,225,897,457 instructions:u # 1.60 insn per cycle - 1.282476041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1656) (512y: 40) (512z:10079) +TOTAL : 1.275916 sec + 2,020,275,483 cycles:u # 1.579 GHz + 3,226,004,098 instructions:u # 1.60 insn per cycle + 1.283258922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1658) (512y: 40) (512z:10085) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 54d2eda591..516ba0b0b6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:04:53 +DATE: 2022-12-18_09:16:29 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.264772e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.317730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.322812e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.257910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.310785e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.316827e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.477032 sec - 106,779,298 cycles:u # 0.153 GHz - 169,921,826 instructions:u # 1.59 insn per cycle - 0.754477504 seconds time elapsed +TOTAL : 0.477660 sec + 108,515,508 cycles:u # 0.156 GHz + 169,503,254 instructions:u # 1.56 insn per cycle + 0.754717642 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.477393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.544883e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.547951e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.558042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.627832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.630970e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.776229 sec - 1,324,280,580 cycles:u # 0.654 GHz - 2,861,570,915 instructions:u # 2.16 insn per cycle - 2.082205197 seconds time elapsed +TOTAL : 1.778708 sec + 1,290,078,725 cycles:u # 0.640 GHz + 2,804,807,184 instructions:u # 2.17 insn per cycle + 2.074413994 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.798178e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.799697e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.799697e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.799611e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801132e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801132e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 9.125847 sec - 24,371,826,035 cycles:u # 2.669 GHz - 78,099,927,644 instructions:u # 3.20 insn per cycle - 9.135156080 seconds time elapsed +TOTAL : 9.118622 sec + 24,375,326,522 cycles:u # 2.672 GHz + 78,102,008,293 instructions:u # 3.20 insn per cycle + 9.126098739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3550) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.962158e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.986253e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.986253e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.008707e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.032649e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.032649e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 2.360979 sec - 6,306,146,744 cycles:u # 2.667 GHz - 20,067,929,566 instructions:u # 3.18 insn per cycle - 2.368507239 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13737) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.345446 sec + 6,267,713,687 cycles:u # 2.667 GHz + 20,068,334,748 instructions:u # 3.20 insn per cycle + 2.352915013 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13743) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.348315e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.357131e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.357131e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.364054e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.373272e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.373272e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.221905 sec - 2,778,135,966 cycles:u # 2.266 GHz - 7,016,234,877 instructions:u # 2.53 insn per cycle - 1.229435389 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11231) (512y: 0) (512z: 0) +TOTAL : 1.207674 sec + 2,749,728,852 cycles:u # 2.268 GHz + 7,016,445,976 instructions:u # 2.55 insn per cycle + 1.214983198 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.527369e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538857e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538857e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.531265e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.542614e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.542614e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.079314 sec - 2,455,120,620 cycles:u # 2.266 GHz - 6,313,530,102 instructions:u # 2.57 insn per cycle - 1.086759842 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10883) (512y: 32) (512z: 0) +TOTAL : 1.076432 sec + 2,448,976,258 cycles:u # 2.266 GHz + 6,313,741,295 instructions:u # 2.58 insn per cycle + 1.084110921 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10891) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.294105e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302318e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.302318e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.294902e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303000e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303000e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.273058 sec - 2,015,862,853 cycles:u # 1.578 GHz - 3,228,704,358 instructions:u # 1.60 insn per cycle - 1.280456041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1656) (512y: 40) (512z:10079) +TOTAL : 1.272161 sec + 2,016,959,435 cycles:u # 1.580 GHz + 3,228,810,942 instructions:u # 1.60 insn per cycle + 1.279785333 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1658) (512y: 40) (512z:10085) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 017026ac46..decfb37da2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_05:01:20 +DATE: 2022-12-18_09:12:55 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.487898e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.309915e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.315295e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.480620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.295200e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300604e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.480283 sec - 115,443,132 cycles:u # 0.165 GHz - 187,808,256 instructions:u # 1.63 insn per cycle - 0.757175550 seconds time elapsed +TOTAL : 0.483358 sec + 117,059,427 cycles:u # 0.167 GHz + 186,004,326 instructions:u # 1.59 insn per cycle + 0.760915293 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -60,14 +60,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.075182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.543260e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.546353e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.126569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.627488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.630628e+05 ) sec^-1 MeanMatrixElemValue = ( 6.473419e+00 +- 4.973148e+00 ) GeV^-4 -TOTAL : 2.104287 sec - 1,635,407,696 cycles:u # 0.760 GHz - 3,323,643,955 instructions:u # 2.03 insn per cycle - 2.399448813 seconds time elapsed +TOTAL : 1.900217 sec + 1,632,706,476 cycles:u # 0.764 GHz + 3,322,733,665 instructions:u # 2.04 insn per cycle + 2.195762189 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -81,14 +81,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.797866e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.799383e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.799383e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.799109e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800631e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800631e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 9.127402 sec - 24,386,837,977 cycles:u # 2.671 GHz - 78,099,927,934 instructions:u # 3.20 insn per cycle - 9.135010006 seconds time elapsed +TOTAL : 9.121149 sec + 24,388,780,862 cycles:u # 2.673 GHz + 78,102,008,366 instructions:u # 3.20 insn per cycle + 9.128646084 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3550) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -106,15 +106,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.999878e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.024145e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.024145e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.967372e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.991038e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991038e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 2.348385 sec - 6,273,457,041 cycles:u # 2.666 GHz - 20,067,929,743 instructions:u # 3.20 insn per cycle - 2.355736729 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13737) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.359347 sec + 6,305,372,793 cycles:u # 2.667 GHz + 20,068,335,344 instructions:u # 3.18 insn per cycle + 2.366847552 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13743) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,15 +131,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.347905e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.356670e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.356670e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.362013e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.370978e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.370978e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.222179 sec - 2,779,240,168 cycles:u # 2.266 GHz - 7,016,235,221 instructions:u # 2.52 insn per cycle - 1.229633625 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11231) (512y: 0) (512z: 0) +TOTAL : 1.209522 sec + 2,753,478,397 cycles:u # 2.268 GHz + 7,016,446,634 instructions:u # 2.55 insn per cycle + 1.217210377 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +156,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.525971e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537409e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537409e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.531282e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.542693e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.542693e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.080238 sec - 2,458,994,717 cycles:u # 2.267 GHz - 6,313,530,213 instructions:u # 2.57 insn per cycle - 1.087568573 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10883) (512y: 32) (512z: 0) +TOTAL : 1.076455 sec + 2,450,285,758 cycles:u # 2.267 GHz + 6,313,741,367 instructions:u # 2.58 insn per cycle + 1.083884697 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10891) (512y: 32) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -181,15 +181,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.294273e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302425e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.302425e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.294203e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302481e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302481e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.272854 sec - 2,014,850,894 cycles:u # 1.578 GHz - 3,228,704,625 instructions:u # 1.60 insn per cycle - 1.280357996 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1656) (512y: 40) (512z:10079) +TOTAL : 1.272988 sec + 2,016,552,410 cycles:u # 1.579 GHz + 3,228,811,260 instructions:u # 1.60 insn per cycle + 1.280396637 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1658) (512y: 40) (512z:10085) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 6d1aab9b64..55428ba5bf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:21:53 +DATE: 2022-12-18_08:33:36 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.294907e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.346241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.351945e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.276393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.332149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.337299e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.497260 sec - 141,521,442 cycles:u # 0.196 GHz - 201,058,468 instructions:u # 1.42 insn per cycle - 1.000510218 seconds time elapsed +TOTAL : 0.495544 sec + 143,094,604 cycles:u # 0.199 GHz + 202,910,024 instructions:u # 1.42 insn per cycle + 0.778485858 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.509639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.581524e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.584672e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.480104e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.552215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.555544e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.748073 sec - 1,154,020,001 cycles:u # 0.578 GHz - 2,360,198,031 instructions:u # 2.05 insn per cycle - 2.053396965 seconds time elapsed +TOTAL : 1.748175 sec + 1,204,478,472 cycles:u # 0.601 GHz + 2,345,864,946 instructions:u # 1.95 insn per cycle + 2.062573413 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.804639e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.806175e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.806175e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.808551e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.810083e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.810083e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 9.092837 sec - 24,302,787,009 cycles:u # 2.671 GHz - 77,854,583,560 instructions:u # 3.20 insn per cycle - 9.131484015 seconds time elapsed +TOTAL : 9.084609 sec + 24,269,747,617 cycles:u # 2.674 GHz + 77,856,664,102 instructions:u # 3.21 insn per cycle + 9.092053139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.059546e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.083807e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.083807e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.070413e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.094612e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.094612e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 2.328158 sec - 6,218,628,905 cycles:u # 2.666 GHz - 20,021,820,015 instructions:u # 3.22 insn per cycle - 2.424654311 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13429) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.324604 sec + 6,213,374,026 cycles:u # 2.668 GHz + 20,022,225,366 instructions:u # 3.22 insn per cycle + 2.332270143 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13435) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.306595e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.314976e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.314976e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.321103e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329548e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329548e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.260367 sec - 2,868,099,594 cycles:u # 2.266 GHz - 7,128,444,148 instructions:u # 2.49 insn per cycle - 1.331356368 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11811) (512y: 0) (512z: 0) +TOTAL : 1.246661 sec + 2,833,278,101 cycles:u # 2.266 GHz + 7,128,655,172 instructions:u # 2.52 insn per cycle + 1.254469136 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11819) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.478079e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.488646e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.488646e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.474669e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.485425e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485425e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.114695 sec - 2,537,118,402 cycles:u # 2.266 GHz - 6,423,903,240 instructions:u # 2.53 insn per cycle - 1.169097494 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11495) (512y: 24) (512z: 0) +TOTAL : 1.117360 sec + 2,541,860,976 cycles:u # 2.266 GHz + 6,424,114,276 instructions:u # 2.53 insn per cycle + 1.125042230 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11503) (512y: 24) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.255543e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.263202e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.263202e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.257306e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.264984e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.264984e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.311607 sec - 2,076,344,019 cycles:u # 1.578 GHz - 3,350,305,870 instructions:u # 1.61 insn per cycle - 1.340625390 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2219) (512y: 33) (512z:10159) +TOTAL : 1.309892 sec + 2,072,915,832 cycles:u # 1.577 GHz + 3,350,412,330 instructions:u # 1.62 insn per cycle + 1.317386444 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2221) (512y: 33) (512z:10165) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index c55358fd35..6642c9c4a6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:46:43 +DATE: 2022-12-18_08:58:21 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.391476e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.435159e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.439613e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.388720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.435908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.440252e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.504351 sec - 149,456,425 cycles:u # 0.203 GHz - 210,880,146 instructions:u # 1.41 insn per cycle - 0.797469128 seconds time elapsed +TOTAL : 0.504876 sec + 148,083,629 cycles:u # 0.201 GHz + 213,589,913 instructions:u # 1.44 insn per cycle + 0.795879503 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.594379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.652104e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.654741e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.587824e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.645497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.648162e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.905152 sec - 1,354,093,384 cycles:u # 0.626 GHz - 2,711,648,673 instructions:u # 2.00 insn per cycle - 2.222076137 seconds time elapsed +TOTAL : 1.906391 sec + 1,330,278,786 cycles:u # 0.615 GHz + 2,684,324,462 instructions:u # 2.02 insn per cycle + 2.220186204 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -79,15 +79,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.039068e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.040265e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.040265e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.085176e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.086391e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.086391e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 32.552303 sec - 86,804,381,764 cycles:u # 2.666 GHz - 135,773,928,524 instructions:u # 1.56 insn per cycle - 32.559826081 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:16004) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 32.256596 sec + 86,122,086,681 cycles:u # 2.670 GHz + 136,414,789,102 instructions:u # 1.58 insn per cycle + 32.264922488 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15760) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,8 +95,8 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627519e-04 -Avg ME (F77/C++) = 6.6275193157177420E-004 -Relative difference = 4.76373951796607e-08 +Avg ME (F77/C++) = 6.6275193244435399E-004 +Relative difference = 4.895399613728019e-08 OK (relative difference <= 2E-4) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.594852e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.616049e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.616049e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.606241e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.627512e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.627512e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.492638 sec - 6,648,308,341 cycles:u # 2.662 GHz - 19,289,377,972 instructions:u # 2.90 insn per cycle - 2.500187602 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69682) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.488337 sec + 6,645,743,101 cycles:u # 2.666 GHz + 19,289,725,584 instructions:u # 2.90 insn per cycle + 2.495904153 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.216915e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.224191e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.224191e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.210351e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.217522e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.217522e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.353608 sec - 3,076,224,318 cycles:u # 2.265 GHz - 6,642,703,704 instructions:u # 2.16 insn per cycle - 1.361105199 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47775) (512y: 0) (512z: 0) +TOTAL : 1.360850 sec + 3,090,657,719 cycles:u # 2.265 GHz + 6,642,867,341 instructions:u # 2.15 insn per cycle + 1.368320763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47779) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.455895e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.466183e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.466183e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.464338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.474724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.474724e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.132285 sec - 2,557,293,914 cycles:u # 2.249 GHz - 5,837,006,540 instructions:u # 2.28 insn per cycle - 1.139978421 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41537) (512y: 16) (512z: 0) +TOTAL : 1.125785 sec + 2,560,155,165 cycles:u # 2.264 GHz + 5,837,170,773 instructions:u # 2.28 insn per cycle + 1.133625211 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41541) (512y: 16) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.292239e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.300360e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.300360e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.299202e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307427e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307427e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.275237 sec - 2,015,191,468 cycles:u # 1.574 GHz - 3,355,793,093 instructions:u # 1.67 insn per cycle - 1.282642894 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4188) (512y: 5) (512z:44224) +TOTAL : 1.268411 sec + 2,008,450,242 cycles:u # 1.577 GHz + 3,355,878,050 instructions:u # 1.67 insn per cycle + 1.276396133 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4189) (512y: 5) (512z:44227) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index ce4f803623..f6e9e9af7c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2022-12-17_04:47:43 +DATE: 2022-12-18_08:59:21 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.362099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.405342e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.409631e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.373501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.412537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.416883e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.503910 sec - 147,500,052 cycles:u # 0.200 GHz - 212,266,509 instructions:u # 1.44 insn per cycle - 0.796134930 seconds time elapsed +TOTAL : 0.504231 sec + 149,592,325 cycles:u # 0.203 GHz + 215,339,315 instructions:u # 1.44 insn per cycle + 0.795314352 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.489312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.546517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.549130e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.537361e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.594547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.597133e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.908301 sec - 1,349,192,042 cycles:u # 0.625 GHz - 2,715,093,738 instructions:u # 2.01 insn per cycle - 2.215349781 seconds time elapsed +TOTAL : 1.901244 sec + 1,320,741,462 cycles:u # 0.614 GHz + 2,650,875,879 instructions:u # 2.01 insn per cycle + 2.208378777 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.979686e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.980850e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.980850e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.085234e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.086448e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.086448e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059968e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 32.939491 sec - 87,819,322,903 cycles:u # 2.666 GHz - 135,809,876,969 instructions:u # 1.55 insn per cycle - 32.947336887 seconds time elapsed +TOTAL : 32.256535 sec + 86,111,644,127 cycles:u # 2.669 GHz + 135,811,530,440 instructions:u # 1.58 insn per cycle + 32.264023154 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15806) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.500650e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.521189e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.521189e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.488160e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.508692e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.508692e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.528251 sec - 6,750,438,004 cycles:u # 2.665 GHz - 19,292,263,352 instructions:u # 2.86 insn per cycle - 2.535697006 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69451) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.533010 sec + 6,759,683,804 cycles:u # 2.664 GHz + 19,292,611,556 instructions:u # 2.85 insn per cycle + 2.540668821 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69454) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.234775e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.242156e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242156e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.237543e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.245219e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245219e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.333667 sec - 3,028,123,553 cycles:u # 2.264 GHz - 6,580,611,889 instructions:u # 2.17 insn per cycle - 1.341220399 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46800) (512y: 0) (512z: 0) +TOTAL : 1.330565 sec + 3,024,153,048 cycles:u # 2.265 GHz + 6,580,775,585 instructions:u # 2.18 insn per cycle + 1.338252383 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46804) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.462667e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.473052e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.473052e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.461441e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.471839e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.471839e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.126586 sec - 2,562,369,285 cycles:u # 2.265 GHz - 5,841,958,938 instructions:u # 2.28 insn per cycle - 1.134293757 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41083) (512y: 11) (512z: 0) +TOTAL : 1.127658 sec + 2,564,890,109 cycles:u # 2.265 GHz + 5,842,122,654 instructions:u # 2.28 insn per cycle + 1.135539285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41087) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.297801e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.305966e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.305966e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.307498e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315891e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.315891e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.269257 sec - 2,005,204,247 cycles:u # 1.575 GHz - 3,065,529,688 instructions:u # 1.53 insn per cycle - 1.277007358 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3360) (512y: 17) (512z:39418) +TOTAL : 1.259958 sec + 1,995,035,546 cycles:u # 1.578 GHz + 3,065,615,391 instructions:u # 1.54 insn per cycle + 1.267502429 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 17) (512z:39421) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 6d441191a6..ecd4b88a3f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:22:29 +DATE: 2022-12-18_08:34:09 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.020190e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.020598e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.020768e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.017564e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.018064e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.018222e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.560122 sec - 1,882,998,681 cycles:u # 0.669 GHz - 3,715,906,704 instructions:u # 1.97 insn per cycle - 3.073662862 seconds time elapsed +TOTAL : 2.492607 sec + 1,892,214,160 cycles:u # 0.669 GHz + 3,801,177,773 instructions:u # 2.01 insn per cycle + 2.885437320 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.253306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.255519e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.255792e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.298859e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.301147e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.301446e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.031974 sec - 3,247,085,645 cycles:u # 0.743 GHz - 6,512,394,113 instructions:u # 2.01 insn per cycle - 4.428476685 seconds time elapsed +TOTAL : 4.027616 sec + 3,145,684,019 cycles:u # 0.720 GHz + 6,472,961,541 instructions:u # 2.06 insn per cycle + 4.424090314 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.362138e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.362632e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.362632e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.332056e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.332542e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.332542e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.173751 sec - 19,117,648,177 cycles:u # 2.663 GHz - 54,096,719,693 instructions:u # 2.83 insn per cycle - 7.285118884 seconds time elapsed +TOTAL : 7.212817 sec + 19,188,573,781 cycles:u # 2.663 GHz + 54,097,086,776 instructions:u # 2.82 insn per cycle + 7.220950379 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32329) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.417875e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.418062e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.418062e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.433973e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.434154e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.434154e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.725937 sec - 9,928,121,332 cycles:u # 2.661 GHz - 27,049,793,869 instructions:u # 2.72 insn per cycle - 3.876528521 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96327) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.684325 sec + 9,821,610,649 cycles:u # 2.663 GHz + 27,049,940,602 instructions:u # 2.75 insn per cycle + 3.691643212 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96333) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.789021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.789772e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789772e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.800029e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.800802e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.800802e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.895304 sec - 4,296,364,224 cycles:u # 2.261 GHz - 9,640,630,424 instructions:u # 2.24 insn per cycle - 1.992622381 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83972) (512y: 0) (512z: 0) +TOTAL : 1.888158 sec + 4,280,464,848 cycles:u # 2.262 GHz + 9,640,703,609 instructions:u # 2.25 insn per cycle + 1.895538819 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83980) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.153845e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.154794e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.154794e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.111344e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.112285e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.112285e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.676252 sec - 3,797,015,147 cycles:u # 2.259 GHz - 8,591,541,832 instructions:u # 2.26 insn per cycle - 1.753765145 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83664) (512y: 30) (512z: 0) +TOTAL : 1.699231 sec + 3,844,821,637 cycles:u # 2.257 GHz + 8,591,614,914 instructions:u # 2.23 insn per cycle + 1.706663975 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83672) (512y: 30) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.102188e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.103281e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.103281e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.110193e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.111311e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111311e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.704683 sec - 2,669,440,641 cycles:u # 1.562 GHz - 4,310,829,662 instructions:u # 1.61 insn per cycle - 1.823311995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1845) (512y: 70) (512z:82918) +TOTAL : 1.700746 sec + 2,669,224,199 cycles:u # 1.565 GHz + 4,310,866,390 instructions:u # 1.62 insn per cycle + 1.709579653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1847) (512y: 70) (512z:82924) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 7cc1480120..2f956f9749 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:55:42 +DATE: 2022-12-18_09:07:19 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.019939e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.021981e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.021981e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.018418e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.020424e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.020424e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.515353 sec - 1,865,421,048 cycles:u # 0.660 GHz - 3,723,946,900 instructions:u # 2.00 insn per cycle - 2.905891724 seconds time elapsed +TOTAL : 2.467702 sec + 1,883,637,412 cycles:u # 0.673 GHz + 3,750,534,328 instructions:u # 1.99 insn per cycle + 2.858696147 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.172878e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.248602e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.248602e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.198981e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.275368e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.275368e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.031809 sec - 3,267,003,083 cycles:u # 0.748 GHz - 6,573,638,278 instructions:u # 2.01 insn per cycle - 4.425703503 seconds time elapsed +TOTAL : 4.034631 sec + 3,307,182,005 cycles:u # 0.757 GHz + 6,664,536,237 instructions:u # 2.02 insn per cycle + 4.428545281 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,14 +88,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.356943e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.357432e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.357432e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.346851e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.347346e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.347346e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.178972 sec - 19,106,682,690 cycles:u # 2.660 GHz - 54,097,972,683 instructions:u # 2.83 insn per cycle - 7.186028622 seconds time elapsed +TOTAL : 7.188972 sec + 19,163,098,671 cycles:u # 2.664 GHz + 54,097,852,430 instructions:u # 2.82 insn per cycle + 7.196289715 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32329) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.421814e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421992e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.421992e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.431129e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.431318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.431318e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.715846 sec - 9,907,772,077 cycles:u # 2.663 GHz - 27,050,769,015 instructions:u # 2.73 insn per cycle - 3.722871850 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96327) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.691782 sec + 9,844,589,801 cycles:u # 2.663 GHz + 27,050,915,197 instructions:u # 2.75 insn per cycle + 3.699038773 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96333) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.788858e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.789610e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789610e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.777070e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.777797e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.777797e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.895830 sec - 4,295,903,148 cycles:u # 2.261 GHz - 9,641,517,558 instructions:u # 2.24 insn per cycle - 1.903064714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83972) (512y: 0) (512z: 0) +TOTAL : 1.903332 sec + 4,311,478,184 cycles:u # 2.261 GHz + 9,641,590,311 instructions:u # 2.24 insn per cycle + 1.910860985 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83980) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.165802e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.166749e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.166749e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.143684e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.144615e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.144615e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.670323 sec - 3,784,031,848 cycles:u # 2.260 GHz - 8,592,428,550 instructions:u # 2.27 insn per cycle - 1.677512890 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83664) (512y: 30) (512z: 0) +TOTAL : 1.682066 sec + 3,814,059,205 cycles:u # 2.262 GHz + 8,592,501,692 instructions:u # 2.25 insn per cycle + 1.689236289 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83672) (512y: 30) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.094530e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.095629e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.095629e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.098332e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.099403e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.099403e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.709855 sec - 2,675,525,751 cycles:u # 1.561 GHz - 4,311,735,119 instructions:u # 1.61 insn per cycle - 1.717322776 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1845) (512y: 70) (512z:82918) +TOTAL : 1.707053 sec + 2,672,429,840 cycles:u # 1.561 GHz + 4,311,771,584 instructions:u # 1.61 insn per cycle + 1.715303358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1847) (512y: 70) (512z:82924) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 15ea46db59..260e73d291 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:24:00 +DATE: 2022-12-18_08:35:37 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.005632e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.006087e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.006253e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.011154e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.011628e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.011782e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.527568 sec - 1,892,648,533 cycles:u # 0.671 GHz - 3,743,221,332 instructions:u # 1.98 insn per cycle - 3.101983860 seconds time elapsed +TOTAL : 2.493175 sec + 1,945,781,235 cycles:u # 0.689 GHz + 3,809,701,314 instructions:u # 1.96 insn per cycle + 2.886308353 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.255397e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.257550e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.257834e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.274882e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.277148e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.277415e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.035042 sec - 3,200,392,721 cycles:u # 0.732 GHz - 6,481,695,957 instructions:u # 2.03 insn per cycle - 4.430864758 seconds time elapsed +TOTAL : 4.029449 sec + 3,153,605,471 cycles:u # 0.721 GHz + 6,418,152,216 instructions:u # 2.04 insn per cycle + 4.428223735 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.350533e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.351013e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.351013e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.313071e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.313557e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.313557e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.185106 sec - 19,143,125,108 cycles:u # 2.663 GHz - 54,120,124,566 instructions:u # 2.83 insn per cycle - 7.299354216 seconds time elapsed +TOTAL : 7.231874 sec + 19,264,241,981 cycles:u # 2.666 GHz + 54,120,921,989 instructions:u # 2.81 insn per cycle + 7.239370104 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.431121e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.431304e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.431304e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.426326e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.426504e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.426504e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.691035 sec - 9,838,629,213 cycles:u # 2.662 GHz - 27,044,215,932 instructions:u # 2.75 insn per cycle - 3.806494638 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96233) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.703840 sec + 9,862,678,202 cycles:u # 2.660 GHz + 27,044,361,999 instructions:u # 2.74 insn per cycle + 3.710975150 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96239) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.807097e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.807869e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.807869e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.789409e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.790159e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.790159e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.883600 sec - 4,267,146,176 cycles:u # 2.259 GHz - 9,651,547,248 instructions:u # 2.26 insn per cycle - 1.995462133 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84136) (512y: 0) (512z: 0) +TOTAL : 1.894929 sec + 4,297,626,057 cycles:u # 2.263 GHz + 9,651,620,151 instructions:u # 2.25 insn per cycle + 1.902226809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84144) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.152902e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.153853e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.153853e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.130815e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131751e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131751e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.676461 sec - 3,802,766,325 cycles:u # 2.262 GHz - 8,600,762,712 instructions:u # 2.26 insn per cycle - 1.870401648 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83693) (512y: 180) (512z: 0) +TOTAL : 1.688547 sec + 3,826,481,018 cycles:u # 2.260 GHz + 8,600,835,797 instructions:u # 2.25 insn per cycle + 1.696097842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83701) (512y: 180) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.108176e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.109277e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.109277e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.080424e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081484e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081484e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.700935 sec - 2,670,182,485 cycles:u # 1.565 GHz - 4,317,780,505 instructions:u # 1.62 insn per cycle - 1.767044692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1764) (512y: 166) (512z:83004) +TOTAL : 1.716703 sec + 2,684,606,516 cycles:u # 1.560 GHz + 4,317,817,247 instructions:u # 1.61 insn per cycle + 1.724550573 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1766) (512y: 166) (512z:83010) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index e4317cabef..b1ab3a2cb9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:25:31 +DATE: 2022-12-18_08:37:06 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.912831e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.913767e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.914154e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.863680e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.864908e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.865333e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.748287 sec - 1,206,525,371 cycles:u # 0.619 GHz - 2,303,225,078 instructions:u # 1.91 insn per cycle - 2.223374413 seconds time elapsed +TOTAL : 1.676666 sec + 1,202,181,912 cycles:u # 0.612 GHz + 2,318,618,504 instructions:u # 1.93 insn per cycle + 2.023680905 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.313900e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.314652e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.314779e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.329019e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.329797e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.329918e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.964808 sec - 1,438,471,487 cycles:u # 0.638 GHz - 2,798,428,720 instructions:u # 1.95 insn per cycle - 2.313316714 seconds time elapsed +TOTAL : 1.953390 sec + 1,427,121,411 cycles:u # 0.636 GHz + 2,727,672,847 instructions:u # 1.91 insn per cycle + 2.302484179 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.776386e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.776817e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.776817e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.685495e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.685918e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.685918e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.791900 sec - 18,098,433,340 cycles:u # 2.663 GHz - 53,603,376,774 instructions:u # 2.96 insn per cycle - 6.953445240 seconds time elapsed +TOTAL : 6.882554 sec + 18,335,563,704 cycles:u # 2.666 GHz + 53,603,736,145 instructions:u # 2.92 insn per cycle + 6.889713491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20316) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.161574e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.162303e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.162303e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.150147e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.150874e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.150874e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187012e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.672079 sec - 4,455,448,936 cycles:u # 2.657 GHz - 13,737,433,282 instructions:u # 3.08 insn per cycle - 1.734194582 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96909) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.678367 sec + 4,476,596,655 cycles:u # 2.661 GHz + 13,737,508,213 instructions:u # 3.07 insn per cycle + 1.685439837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96915) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.634155e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.636630e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.636630e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.547617e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.550023e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.550023e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.939331 sec - 2,129,021,494 cycles:u # 2.254 GHz - 4,846,771,822 instructions:u # 2.28 insn per cycle - 1.006791914 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84249) (512y: 0) (512z: 0) +TOTAL : 0.954069 sec + 2,163,828,885 cycles:u # 2.258 GHz + 4,846,809,142 instructions:u # 2.24 insn per cycle + 0.961350694 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84257) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.315511e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.318588e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.318588e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.281240e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.284193e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.284193e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.838145 sec - 1,900,484,423 cycles:u # 2.254 GHz - 4,319,273,641 instructions:u # 2.27 insn per cycle - 0.900943991 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83912) (512y: 33) (512z: 0) +TOTAL : 0.843106 sec + 1,912,394,295 cycles:u # 2.256 GHz + 4,319,310,963 instructions:u # 2.26 insn per cycle + 0.850553162 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83920) (512y: 33) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.202086e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.205636e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.205636e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.193207e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.196767e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.196767e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.853846 sec - 1,338,724,837 cycles:u # 1.560 GHz - 2,169,611,594 instructions:u # 1.62 insn per cycle - 0.961045945 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 41) (512z:83038) +TOTAL : 0.855556 sec + 1,338,706,775 cycles:u # 1.556 GHz + 2,169,630,446 instructions:u # 1.62 insn per cycle + 0.863242114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 41) (512z:83044) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 0cb8ceb02d..853f530be8 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:57:11 +DATE: 2022-12-18_09:08:47 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +47,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.645927e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.648317e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.648317e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.650709e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653147e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653147e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187093e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.676344 sec - 1,187,775,069 cycles:u # 0.605 GHz - 2,306,519,708 instructions:u # 1.94 insn per cycle - 2.023226157 seconds time elapsed +TOTAL : 1.677619 sec + 1,175,855,673 cycles:u # 0.598 GHz + 2,266,411,985 instructions:u # 1.93 insn per cycle + 2.023464512 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -66,14 +66,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.281968e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.299984e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.299984e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.284426e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.302358e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.302358e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856716e-04 +- 8.331923e-05 ) GeV^-6 -TOTAL : 1.961992 sec - 1,508,611,860 cycles:u # 0.671 GHz - 2,825,372,038 instructions:u # 1.87 insn per cycle - 2.307517644 seconds time elapsed +TOTAL : 1.966358 sec + 1,433,079,174 cycles:u # 0.632 GHz + 2,848,303,392 instructions:u # 1.99 insn per cycle + 2.325641729 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -88,14 +88,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.772431e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.772872e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.772872e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.655413e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.655821e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.655821e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.796456 sec - 18,105,604,683 cycles:u # 2.662 GHz - 53,604,121,862 instructions:u # 2.96 insn per cycle - 6.803714380 seconds time elapsed +TOTAL : 6.899400 sec + 18,397,015,524 cycles:u # 2.665 GHz + 53,604,481,213 instructions:u # 2.91 insn per cycle + 6.906809912 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20316) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -114,15 +114,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.155996e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.156752e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.156752e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.144935e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.145665e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.145665e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187012e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.675481 sec - 4,464,928,651 cycles:u # 2.658 GHz - 13,738,312,678 instructions:u # 3.08 insn per cycle - 1.682462835 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96909) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.681383 sec + 4,482,043,807 cycles:u # 2.660 GHz + 13,738,387,627 instructions:u # 3.07 insn per cycle + 1.688517385 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96915) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,15 +140,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.641235e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.643658e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.643658e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.547250e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.549592e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.549592e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.938502 sec - 2,127,809,633 cycles:u # 2.257 GHz - 4,847,669,874 instructions:u # 2.28 insn per cycle - 0.945611948 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84249) (512y: 0) (512z: 0) +TOTAL : 0.954318 sec + 2,164,753,139 cycles:u # 2.258 GHz + 4,847,707,150 instructions:u # 2.24 insn per cycle + 0.961597428 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84257) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +166,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.332130e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.335224e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.335224e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.304049e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.307009e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.307009e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.836425 sec - 1,897,600,630 cycles:u # 2.257 GHz - 4,320,171,552 instructions:u # 2.28 insn per cycle - 0.843563039 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83912) (512y: 33) (512z: 0) +TOTAL : 0.840011 sec + 1,907,040,976 cycles:u # 2.258 GHz + 4,320,208,762 instructions:u # 2.27 insn per cycle + 0.847098380 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83920) (512y: 33) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,15 +192,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.203785e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.207348e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.207348e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.206385e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.210159e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.210159e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.854143 sec - 1,337,550,325 cycles:u # 1.558 GHz - 2,170,525,121 instructions:u # 1.62 insn per cycle - 0.861391533 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 41) (512z:83038) +TOTAL : 0.853620 sec + 1,338,033,901 cycles:u # 1.560 GHz + 2,170,544,131 instructions:u # 1.62 insn per cycle + 0.861057466 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 41) (512z:83044) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index fdbd704691..910f7f2eee 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -35,7 +35,7 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2022-12-17_04:26:38 +DATE: 2022-12-18_08:38:11 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -43,14 +43,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.792587e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.793559e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.793892e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.830274e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.831184e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.831551e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.682947 sec - 1,214,656,280 cycles:u # 0.615 GHz - 2,331,423,270 instructions:u # 1.92 insn per cycle - 2.033116568 seconds time elapsed +TOTAL : 1.679473 sec + 1,206,804,623 cycles:u # 0.613 GHz + 2,254,312,859 instructions:u # 1.87 insn per cycle + 2.026713402 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... @@ -58,14 +58,14 @@ runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.7.99 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.307370e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.308103e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.308212e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.348659e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.349466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.349601e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.963960 sec - 1,474,961,991 cycles:u # 0.652 GHz - 2,885,038,324 instructions:u # 1.96 insn per cycle - 2.319945105 seconds time elapsed +TOTAL : 1.963224 sec + 1,443,974,245 cycles:u # 0.638 GHz + 2,791,629,349 instructions:u # 1.93 insn per cycle + 2.321560596 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -79,14 +79,14 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.814623e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.815087e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.815087e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.729912e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.730341e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.730341e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.758522 sec - 18,006,556,250 cycles:u # 2.663 GHz - 53,623,471,166 instructions:u # 2.98 insn per cycle - 6.765623668 seconds time elapsed +TOTAL : 6.840156 sec + 18,227,458,978 cycles:u # 2.666 GHz + 53,623,829,841 instructions:u # 2.94 insn per cycle + 6.847305953 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20530) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe @@ -104,15 +104,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.159592e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.160324e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.160324e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.150929e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.151640e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.151640e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187012e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.672896 sec - 4,457,609,437 cycles:u # 2.659 GHz - 13,730,500,380 instructions:u # 3.08 insn per cycle - 1.680120229 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96730) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.677648 sec + 4,475,316,914 cycles:u # 2.660 GHz + 13,730,574,657 instructions:u # 3.07 insn per cycle + 1.684966269 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96736) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,15 +129,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.530949e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.533330e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.533330e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.613562e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.616011e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.616011e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.956612 sec - 2,169,366,771 cycles:u # 2.257 GHz - 4,853,177,294 instructions:u # 2.24 insn per cycle - 0.964098964 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84835) (512y: 0) (512z: 0) +TOTAL : 0.942575 sec + 2,136,764,028 cycles:u # 2.257 GHz + 4,853,214,650 instructions:u # 2.27 insn per cycle + 0.949740475 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84843) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +154,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.303337e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.306380e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.306380e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.208218e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.211115e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.211115e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.839598 sec - 1,903,234,307 cycles:u # 2.255 GHz - 4,325,313,664 instructions:u # 2.27 insn per cycle - 0.846866142 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84494) (512y: 22) (512z: 0) +TOTAL : 0.852519 sec + 1,932,119,176 cycles:u # 2.255 GHz + 4,325,350,809 instructions:u # 2.24 insn per cycle + 0.860038556 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84502) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -179,15 +179,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.171636e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.175197e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.175197e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.193324e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.196932e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.196932e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187176e-05 +- 9.826765e-06 ) GeV^-6 -TOTAL : 0.858014 sec - 1,343,516,832 cycles:u # 1.558 GHz - 2,176,466,085 instructions:u # 1.62 insn per cycle - 0.865148572 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2718) (512y: 23) (512z:83233) +TOTAL : 0.856309 sec + 1,342,395,401 cycles:u # 1.561 GHz + 2,176,484,700 instructions:u # 1.62 insn per cycle + 0.863755501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2720) (512y: 23) (512z:83239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests.