Function: clearRVecs(gmx::ArrayRef<gmx::BasicVector<float> >, bool) [clone ._omp_fn.0] | Module: libgromacs_mpi.so.7 | Source: sim_util.cpp:473-474 [...] | Coverage: 0.05% |
---|
Function: clearRVecs(gmx::ArrayRef<gmx::BasicVector<float> >, bool) [clone ._omp_fn.0] | Module: libgromacs_mpi.so.7 | Source: sim_util.cpp:473-474 [...] | Coverage: 0.05% |
---|
/home/eoseret/GROMACS/gromacs-2022.4/api/legacy/include/gromacs/math/vec.h: 292 - 294 |
-------------------------------------------------------------------------------- |
292: a[XX] = 0.0_real; |
293: a[YY] = 0.0_real; |
294: a[ZZ] = 0.0_real; |
/home/eoseret/GROMACS/gromacs-2022.4/src/gromacs/mdlib/sim_util.cpp: 473 - 474 |
-------------------------------------------------------------------------------- |
473: #pragma omp parallel for num_threads(nth) schedule(static) |
474: for (gmx::index i = 0; i < v.ssize(); i++) |
/home/eoseret/GROMACS/gromacs-2022.4/api/legacy/include/gromacs/utility/arrayref.h: 85 - 85 |
-------------------------------------------------------------------------------- |
85: constexpr auto operator-(ArrayRefIter other) const noexcept { return it_ - other.it_; } |
0x97de00 STP X29, X30, [SP, #976]! |
0x97de04 ORR X1, XZR, #3841 |
0x97de08 MOVK X1, #43691 |
0x97de0c ADD X29, SP, #0 |
0x97de10 LDR X0, [X0] |
0x97de14 STP X19, X20, [SP, #16] |
0x97de18 LDR X19, [X0, #8] |
0x97de1c STR X21, [SP, #32] |
0x97de20 LDR X21, [X0] |
0x97de24 SUB X2, X19, X21 |
0x97de28 SBFM X3, X2, #2, #63 |
0x97de2c MADD X19, X3, X1, XZR |
0x97de30 BL 199830 |
0x97de34 SBFM X20, X0, #0, #31 |
0x97de38 BL 18ded0 |
0x97de3c SBFM X4, X0, #0, #31 |
0x97de40 SDIV X5, X19, X20 |
0x97de44 MSUB X6, X5, X20, X19 |
0x97de48 CMP X4, X6 |
0x97de4c B.LT 97dfd0 |
(17753) 0x97de50 MADD X7, X5, X4, X6 |
(17753) 0x97de54 ADD X8, X5, X7 |
(17753) 0x97de58 CMP X7, X8 |
(17753) 0x97de5c B.GE 97dfc0 |
(17753) 0x97de60 ADD X12, X5, X5,LSL #1 |
(17753) 0x97de64 ADD X9, X7, X7,LSL #1 |
(17753) 0x97de68 CMP X12, #16 |
(17753) 0x97de6c MOVZ X10, #16 |
(17753) 0x97de70 CSEL X11, X12, X10, #2 |
(17753) 0x97de74 MOVZ X14, #8 |
(17753) 0x97de78 SUB X13, X11, #16 |
(17753) 0x97de7c ADD X0, X21, X9,LSL #2 |
(17753) 0x97de80 WHILELO P6.S, XZR, X13 |
(17753) 0x97de84 MOVZ X2, #0 |
(17753) 0x97de88 CMP X12, X14 |
(17753) 0x97de8c DUP Z0.S, #0 |
(17753) 0x97de90 CSEL X15, X12, X14, #2 |
(17753) 0x97de94 SUB X16, X15, #8 |
(17753) 0x97de98 WHILELO P7.S, XZR, X12 |
(17753) 0x97de9c WHILELO P0.S, XZR, X16 |
(17754) 0x97dea0 ADD X17, X2, #24 |
(17754) 0x97dea4 WHILELO P3.S, X17, X13 |
(17754) 0x97dea8 WHILELO P4.S, X17, X16 |
(17754) 0x97deac ST1W {Z0.S}, P7, [X0, MUL VL] |
(17754) 0x97deb0 ST1W {Z0.S}, P0, [X0, #1, MUL VL] |
(17754) 0x97deb4 ST1W {Z0.S}, P6, [X0, #2, MUL VL] |
(17754) 0x97deb8 ADD X18, X2, #48 |
(17754) 0x97debc ADD X30, X2, #72 |
(17754) 0x97dec0 ADD X1, X2, #96 |
(17754) 0x97dec4 ADD X21, X2, #120 |
(17754) 0x97dec8 ADD X3, X2, #144 |
(17754) 0x97decc ADD X19, X0, #96 |
(17754) 0x97ded0 WHILELO P0.S, X17, X12 |
(17754) 0x97ded4 B.EQ 97dfc0 |
(17754) 0x97ded8 WHILELO P1.S, X18, X13 |
(17754) 0x97dedc WHILELO P2.S, X18, X16 |
(17754) 0x97dee0 ST1W {Z0.S}, P0, [X0, #3, MUL VL] |
(17754) 0x97dee4 ST1W {Z0.S}, P4, [X19, #1, MUL VL] |
(17754) 0x97dee8 ST1W {Z0.S}, P3, [X19, #2, MUL VL] |
(17754) 0x97deec ADD X20, X2, #168 |
(17754) 0x97def0 ADD X4, X0, #192 |
(17754) 0x97def4 WHILELO P5.S, X18, X12 |
(17754) 0x97def8 B.EQ 97dfc0 |
(17754) 0x97defc WHILELO P6.S, X30, X13 |
(17754) 0x97df00 WHILELO P7.S, X30, X16 |
(17754) 0x97df04 ST1W {Z0.S}, P5, [X0, #6, MUL VL] |
(17754) 0x97df08 ST1W {Z0.S}, P2, [X4, #1, MUL VL] |
(17754) 0x97df0c ST1W {Z0.S}, P1, [X4, #2, MUL VL] |
(17754) 0x97df10 ADD X2, X2, #192 |
(17754) 0x97df14 ADD X5, X0, #288 |
(17754) 0x97df18 WHILELO P3.S, X30, X12 |
(17754) 0x97df1c B.EQ 97dfc0 |
(17754) 0x97df20 WHILELO P1.S, X1, X13 |
(17754) 0x97df24 WHILELO P2.S, X1, X16 |
(17754) 0x97df28 ST1W {Z0.S}, P3, [X5, MUL VL] |
(17754) 0x97df2c ST1W {Z0.S}, P7, [X5, #1, MUL VL] |
(17754) 0x97df30 ST1W {Z0.S}, P6, [X5, #2, MUL VL] |
(17754) 0x97df34 ADD X6, X0, #384 |
(17754) 0x97df38 WHILELO P0.S, X1, X12 |
(17754) 0x97df3c B.EQ 97dfc0 |
(17754) 0x97df40 WHILELO P5.S, X21, X13 |
(17754) 0x97df44 WHILELO P4.S, X21, X16 |
(17754) 0x97df48 ST1W {Z0.S}, P0, [X6, MUL VL] |
(17754) 0x97df4c ST1W {Z0.S}, P2, [X6, #1, MUL VL] |
(17754) 0x97df50 ST1W {Z0.S}, P1, [X6, #2, MUL VL] |
(17754) 0x97df54 ADD X7, X0, #480 |
(17754) 0x97df58 WHILELO P6.S, X21, X12 |
(17754) 0x97df5c B.EQ 97dfc0 |
(17754) 0x97df60 WHILELO P7.S, X3, X13 |
(17754) 0x97df64 WHILELO P1.S, X3, X16 |
(17754) 0x97df68 ST1W {Z0.S}, P6, [X7, MUL VL] |
(17754) 0x97df6c ST1W {Z0.S}, P4, [X7, #1, MUL VL] |
(17754) 0x97df70 ST1W {Z0.S}, P5, [X7, #2, MUL VL] |
(17754) 0x97df74 ADD X8, X0, #576 |
(17754) 0x97df78 WHILELO P2.S, X3, X12 |
(17754) 0x97df7c B.EQ 97dfc0 |
(17754) 0x97df80 WHILELO P3.S, X20, X13 |
(17754) 0x97df84 WHILELO P5.S, X20, X16 |
(17754) 0x97df88 ST1W {Z0.S}, P2, [X8, MUL VL] |
(17754) 0x97df8c ST1W {Z0.S}, P1, [X8, #1, MUL VL] |
(17754) 0x97df90 ST1W {Z0.S}, P7, [X8, #2, MUL VL] |
(17754) 0x97df94 ADD X9, X0, #672 |
(17754) 0x97df98 WHILELO P4.S, X20, X12 |
(17754) 0x97df9c B.EQ 97dfc0 |
(17754) 0x97dfa0 WHILELO P6.S, X2, X13 |
(17754) 0x97dfa4 WHILELO P0.S, X2, X16 |
(17754) 0x97dfa8 ST1W {Z0.S}, P4, [X9, MUL VL] |
(17754) 0x97dfac ST1W {Z0.S}, P5, [X9, #1, MUL VL] |
(17754) 0x97dfb0 ST1W {Z0.S}, P3, [X9, #2, MUL VL] |
(17754) 0x97dfb4 ADD X0, X0, #768 |
(17754) 0x97dfb8 WHILELO P7.S, X2, X12 |
(17754) 0x97dfbc B.NE 97dea0 |
(17753) 0x97dfc0 LDP X19, X20, [SP, #16] |
(17753) 0x97dfc4 LDR X21, [SP, #32] |
(17753) 0x97dfc8 LDP X29, X30, [SP], #48 |
(17753) 0x97dfcc RET |
(17753) 0x97dfd0 ADD X5, X5, #1 |
(17753) 0x97dfd4 MOVZ X6, #0 |
(17753) 0x97dfd8 B 97de50 |
0x97dfdc HINT #0 |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
►96.19+ | __kmp_GOMP_microtask_wrapper(i[...] | libomp.so | |
○ | __kmp_invoke_microtask | libomp.so | |
○2.86 | __kmp_invoke_microtask | libomp.so |
Path / |
Source file and lines | sim_util.cpp:473-474 |
Module | libgromacs_mpi.so.7 |
nb instructions | 21 |
loop length | 84 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
cycles | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 9.08 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X1, XZR, #3841 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK X1, #43691 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X0, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X19, [X0, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X21, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB X2, X19, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBFM X3, X2, #2, #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X19, X3, X1, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 199830 <@plt_start@+0xe910> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SBFM X20, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 18ded0 <@plt_start@+0x2fb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SBFM X4, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SDIV X5, X19, X20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
MSUB X6, X5, X20, X19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X4, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 97dfd0 <_ZL10clearRVecsN3gmx8ArrayRefINS_11BasicVectorIfEEEEb._omp_fn.0+0x1d0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Source file and lines | sim_util.cpp:473-474 |
Module | libgromacs_mpi.so.7 |
nb instructions | 21 |
loop length | 84 |
nb stack references | 0 |
front end | 2.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
cycles | 1.50 | 1.50 | 3.50 | 3.50 | 3.50 | 3.50 | 0.00 | 0.00 | 0.00 | 0.00 | 2.17 | 1.83 | 2.00 | 1.50 | 1.50 |
Cycles executing div or sqrt instructions | 1.00-0.50 |
Front-end | 2.50 |
Overall L1 | 9.08 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | 0% |
other | 0% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | P12 | P13 | P14 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
STP X29, X30, [SP, #976]! | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
ORR X1, XZR, #3841 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MOVK X1, #43691 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
ADD X29, SP, #0 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
LDR X0, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STP X19, X20, [SP, #16] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X19, [X0, #8] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
STR X21, [SP, #32] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0.50 | 0 | 0.50 | 0.50 | 1 | 0.50 |
LDR X21, [X0] | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0.33 | 0.33 | 0 | 0 | 4 | 0.33 |
SUB X2, X19, X21 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SBFM X3, X2, #2, #63 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
MADD X19, X3, X1, XZR | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
BL 199830 <@plt_start@+0xe910> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SBFM X20, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
BL 18ded0 <@plt_start@+0x2fb0> | 1 | 0.50 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
SBFM X4, X0, #0, #31 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.25 |
SDIV X5, X19, X20 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5-20 | 1-0.50 |
MSUB X6, X5, X20, X19 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 |
CMP X4, X6 | 1 | 0 | 0 | 0.25 | 0.25 | 0.25 | 0.25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
B.LT 97dfd0 <_ZL10clearRVecsN3gmx8ArrayRefINS_11BasicVectorIfEEEEb._omp_fn.0+0x1d0> | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
HINT #0 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼clearRVecs(gmx::ArrayRef | 0.05 | 0.02 |
▼Loop 17753 - arrayref.h:85-85 - libgromacs_mpi.so.7– | 0 | 0 |
○Loop 17754 - vec.h:292-294 - libgromacs_mpi.so.7 | 0.04 | 0.02 |