Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
Function: setTemperature._omp_fn.0 | Module: exec | Source: initAtoms.c:151-162 [...] | Coverage: 0.01% |
---|
/home/eoseret/qaas_runs_CPU_9468/171-110-4860/intel/CoMD/build/CoMD/CoMD/src-openmp/random.c: 45 - 70 |
-------------------------------------------------------------------------------- |
45: *seed *= UINT64_C(437799614237992725); |
46: *seed %= UINT64_C(2305843009213693951); |
[...] |
68: uint32_t s2 = (id+callSite) * UINT32_C(2654435761); |
69: |
70: uint64_t iSeed = (UINT64_C(0x100000000) * s1) + s2; |
/home/eoseret/qaas_runs_CPU_9468/171-110-4860/intel/CoMD/build/CoMD/CoMD/src-openmp/initAtoms.c: 151 - 162 |
-------------------------------------------------------------------------------- |
151: #pragma omp parallel for |
152: for (int iBox=0; iBox<s->boxes->nLocalBoxes; ++iBox) |
153: { |
154: for (int iOff=MAXATOMS*iBox, ii=0; ii<s->boxes->nAtoms[iBox]; ++ii, ++iOff) |
155: { |
156: int iType = s->atoms->iSpecies[iOff]; |
157: real_t mass = s->species[iType].mass; |
158: real_t sigma = sqrt(kB_eV * temperature/mass); |
159: uint64_t seed = mkSeed(s->atoms->gid[iOff], 123); |
160: s->atoms->p[iOff][0] = mass * sigma * gasdev(&seed); |
161: s->atoms->p[iOff][1] = mass * sigma * gasdev(&seed); |
162: s->atoms->p[iOff][2] = mass * sigma * gasdev(&seed); |
0x40f780 PUSH %RBP |
0x40f781 MOV %RSP,%RBP |
0x40f784 PUSH %R15 |
0x40f786 PUSH %R14 |
0x40f788 PUSH %R13 |
0x40f78a MOV %RDI,%R13 |
0x40f78d PUSH %R12 |
0x40f78f PUSH %RBX |
0x40f790 SUB $0x68,%RSP |
0x40f794 MOV (%RDI),%RBX |
0x40f797 CALL 403060 <omp_get_num_threads@plt> |
0x40f79c MOV 0x18(%RBX),%R14 |
0x40f7a0 MOV %EAX,%R12D |
0x40f7a3 CALL 403150 <omp_get_thread_num@plt> |
0x40f7a8 MOV %EAX,%R10D |
0x40f7ab MOV 0xc(%R14),%EAX |
0x40f7af CLTD |
0x40f7b0 IDIV %R12D |
0x40f7b3 CMP %EDX,%R10D |
0x40f7b6 JL 40fabd |
0x40f7bc IMUL %EAX,%R10D |
0x40f7c0 ADD %EDX,%R10D |
0x40f7c3 LEA (%RAX,%R10,1),%R11D |
0x40f7c7 CMP %R11D,%R10D |
0x40f7ca JGE 40faae |
0x40f7d0 VMOVSD 0x34e0(%RIP),%XMM0 |
0x40f7d8 VMULSD 0x8(%R13),%XMM0,%XMM4 |
0x40f7de MOV %RBX,-0x88(%RBP) |
0x40f7e5 MOV 0x78(%R14),%RCX |
0x40f7e9 MOV %R11D,-0x7c(%RBP) |
0x40f7ed MOV $0x613606df9756715,%R14 |
0x40f7f7 MOV %RCX,-0x78(%RBP) |
0x40f7fb MOVSXD %R10D,%RCX |
0x40f7fe MOV %RCX,%R11 |
0x40f801 VMOVSD %XMM4,-0x68(%RBP) |
0x40f806 NOPW %CS:(%RAX,%RAX,1) |
(93) 0x40f810 MOV -0x78(%RBP),%RSI |
(93) 0x40f814 MOV %R11D,%EDI |
(93) 0x40f817 SAL $0x6,%EDI |
(93) 0x40f81a MOVSXD (%RSI,%R11,4),%R8 |
(93) 0x40f81e TEST %R8D,%R8D |
(93) 0x40f821 JLE 40faa1 |
(93) 0x40f827 MOV -0x88(%RBP),%R9 |
(93) 0x40f82e MOV %R11,%RAX |
(93) 0x40f831 MOV %R11,-0x70(%RBP) |
(93) 0x40f835 MOVSXD %EDI,%R10 |
(93) 0x40f838 SAL $0x6,%RAX |
(93) 0x40f83c MOV 0x20(%R9),%R13 |
(93) 0x40f840 ADD %RAX,%R8 |
(93) 0x40f843 MOV 0x28(%R9),%RBX |
(93) 0x40f847 SAL $0x2,%R8 |
(93) 0x40f84b MOV 0x10(%R13),%R15 |
(93) 0x40f84f MOV 0x8(%R13),%R12 |
(93) 0x40f853 MOV %R8,-0x50(%RBP) |
(93) 0x40f857 MOV %RBX,-0x60(%RBP) |
(93) 0x40f85b LEA (,%R10,4),%RBX |
(93) 0x40f863 MOV %R15,-0x58(%RBP) |
(93) 0x40f867 NOPW (%RAX,%RAX,1) |
(94) 0x40f870 MOV (%R12,%RBX,1),%EDI |
(94) 0x40f874 MOV -0x58(%RBP),%RDX |
(94) 0x40f878 MOV $0x9,%R9D |
(94) 0x40f87e MOV -0x60(%RBP),%RCX |
(94) 0x40f882 VMOVSD -0x68(%RBP),%XMM3 |
(94) 0x40f887 IMUL $-0x61c8864f,%EDI,%ESI |
(94) 0x40f88d ADD $0x7b,%EDI |
(94) 0x40f890 MOVSXD (%RDX,%RBX,1),%R11 |
(94) 0x40f894 IMUL $-0x61c8864f,%EDI,%R8D |
(94) 0x40f89b SAL $0x4,%R11 |
(94) 0x40f89f SAL $0x20,%RSI |
(94) 0x40f8a3 VMOVSD 0x8(%RCX,%R11,1),%XMM1 |
(94) 0x40f8aa ADD %R8,%RSI |
(94) 0x40f8ad IMUL %R14,%RSI |
(94) 0x40f8b1 VDIVSD %XMM1,%XMM3,%XMM2 |
(94) 0x40f8b5 MOV %RSI,%RAX |
(94) 0x40f8b8 MOV %RSI,%R15 |
(94) 0x40f8bb MUL %R9 |
(94) 0x40f8be SUB %RDX,%R15 |
(94) 0x40f8c1 SHR $0x1,%R15 |
(94) 0x40f8c4 ADD %R15,%RDX |
(94) 0x40f8c7 SHR $0x3c,%RDX |
(94) 0x40f8cb MOV %RDX,%R10 |
(94) 0x40f8ce SAL $0x3d,%R10 |
(94) 0x40f8d2 SUB %RDX,%R10 |
(94) 0x40f8d5 SUB %R10,%RSI |
(94) 0x40f8d8 IMUL %R14,%RSI |
(94) 0x40f8dc MOV %RSI,%RAX |
(94) 0x40f8df MOV %RSI,%R11 |
(94) 0x40f8e2 MUL %R9 |
(94) 0x40f8e5 SUB %RDX,%R11 |
(94) 0x40f8e8 SHR $0x1,%R11 |
(94) 0x40f8eb ADD %R11,%RDX |
(94) 0x40f8ee SHR $0x3c,%RDX |
(94) 0x40f8f2 MOV %RDX,%RCX |
(94) 0x40f8f5 SAL $0x3d,%RCX |
(94) 0x40f8f9 SUB %RDX,%RCX |
(94) 0x40f8fc VSQRTSD %XMM2,%XMM2,%XMM2 |
(94) 0x40f900 SUB %RCX,%RSI |
(94) 0x40f903 IMUL %R14,%RSI |
(94) 0x40f907 VMULSD %XMM2,%XMM1,%XMM5 |
(94) 0x40f90b MOV %RSI,%RAX |
(94) 0x40f90e MOV %RSI,%RDI |
(94) 0x40f911 MUL %R9 |
(94) 0x40f914 VMOVSD %XMM5,-0x48(%RBP) |
(94) 0x40f919 SUB %RDX,%RDI |
(94) 0x40f91c SHR $0x1,%RDI |
(94) 0x40f91f ADD %RDI,%RDX |
(94) 0x40f922 SHR $0x3c,%RDX |
(94) 0x40f926 MOV %RDX,%R8 |
(94) 0x40f929 SAL $0x3d,%R8 |
(94) 0x40f92d SUB %RDX,%R8 |
(94) 0x40f930 SUB %R8,%RSI |
(94) 0x40f933 IMUL %R14,%RSI |
(94) 0x40f937 MOV %RSI,%RAX |
(94) 0x40f93a MOV %RSI,%R15 |
(94) 0x40f93d MUL %R9 |
(94) 0x40f940 SUB %RDX,%R15 |
(94) 0x40f943 SHR $0x1,%R15 |
(94) 0x40f946 ADD %R15,%RDX |
(94) 0x40f949 SHR $0x3c,%RDX |
(94) 0x40f94d MOV %RDX,%R10 |
(94) 0x40f950 SAL $0x3d,%R10 |
(94) 0x40f954 SUB %RDX,%R10 |
(94) 0x40f957 SUB %R10,%RSI |
(94) 0x40f95a IMUL %R14,%RSI |
(94) 0x40f95e MOV %RSI,%RAX |
(94) 0x40f961 MOV %RSI,%R11 |
(94) 0x40f964 MUL %R9 |
(94) 0x40f967 SUB %RDX,%R11 |
(94) 0x40f96a SHR $0x1,%R11 |
(94) 0x40f96d ADD %R11,%RDX |
(94) 0x40f970 SHR $0x3c,%RDX |
(94) 0x40f974 MOV %RDX,%RCX |
(94) 0x40f977 SAL $0x3d,%RCX |
(94) 0x40f97b SUB %RDX,%RCX |
(94) 0x40f97e SUB %RCX,%RSI |
(94) 0x40f981 IMUL %R14,%RSI |
(94) 0x40f985 MOV %RSI,%RAX |
(94) 0x40f988 MOV %RSI,%RDI |
(94) 0x40f98b MUL %R9 |
(94) 0x40f98e SUB %RDX,%RDI |
(94) 0x40f991 SHR $0x1,%RDI |
(94) 0x40f994 ADD %RDI,%RDX |
(94) 0x40f997 SHR $0x3c,%RDX |
(94) 0x40f99b MOV %RDX,%R8 |
(94) 0x40f99e SAL $0x3d,%R8 |
(94) 0x40f9a2 SUB %RDX,%R8 |
(94) 0x40f9a5 SUB %R8,%RSI |
(94) 0x40f9a8 IMUL %R14,%RSI |
(94) 0x40f9ac MOV %RSI,%RAX |
(94) 0x40f9af MOV %RSI,%R15 |
(94) 0x40f9b2 MUL %R9 |
(94) 0x40f9b5 SUB %RDX,%R15 |
(94) 0x40f9b8 SHR $0x1,%R15 |
(94) 0x40f9bb ADD %R15,%RDX |
(94) 0x40f9be SHR $0x3c,%RDX |
(94) 0x40f9c2 MOV %RDX,%R10 |
(94) 0x40f9c5 SAL $0x3d,%R10 |
(94) 0x40f9c9 SUB %RDX,%R10 |
(94) 0x40f9cc SUB %R10,%RSI |
(94) 0x40f9cf IMUL %R14,%RSI |
(94) 0x40f9d3 MOV %RSI,%RAX |
(94) 0x40f9d6 MOV %RSI,%R11 |
(94) 0x40f9d9 MUL %R9 |
(94) 0x40f9dc SUB %RDX,%R11 |
(94) 0x40f9df SHR $0x1,%R11 |
(94) 0x40f9e2 ADD %R11,%RDX |
(94) 0x40f9e5 SHR $0x3c,%RDX |
(94) 0x40f9e9 MOV %RDX,%RCX |
(94) 0x40f9ec SAL $0x3d,%RCX |
(94) 0x40f9f0 SUB %RDX,%RCX |
(94) 0x40f9f3 SUB %RCX,%RSI |
(94) 0x40f9f6 IMUL %R14,%RSI |
(94) 0x40f9fa MOV %RSI,%RAX |
(94) 0x40f9fd MOV %RSI,%RDI |
(94) 0x40fa00 MUL %R9 |
(94) 0x40fa03 SUB %RDX,%RDI |
(94) 0x40fa06 SHR $0x1,%RDI |
(94) 0x40fa09 ADD %RDI,%RDX |
(94) 0x40fa0c LEA -0x38(%RBP),%RDI |
(94) 0x40fa10 SHR $0x3c,%RDX |
(94) 0x40fa14 MOV %RDX,%R8 |
(94) 0x40fa17 SAL $0x3d,%R8 |
(94) 0x40fa1b SUB %RDX,%R8 |
(94) 0x40fa1e SUB %R8,%RSI |
(94) 0x40fa21 IMUL %R14,%RSI |
(94) 0x40fa25 MOV %RSI,%RAX |
(94) 0x40fa28 MUL %R9 |
(94) 0x40fa2b MOV %RSI,%R9 |
(94) 0x40fa2e SUB %RDX,%R9 |
(94) 0x40fa31 SHR $0x1,%R9 |
(94) 0x40fa34 ADD %R9,%RDX |
(94) 0x40fa37 SHR $0x3c,%RDX |
(94) 0x40fa3b MOV %RDX,%R15 |
(94) 0x40fa3e SAL $0x3d,%R15 |
(94) 0x40fa42 SUB %RDX,%R15 |
(94) 0x40fa45 SUB %R15,%RSI |
(94) 0x40fa48 MOV %RSI,-0x38(%RBP) |
(94) 0x40fa4c CALL 40f690 <gasdev> |
(94) 0x40fa51 VMULSD -0x48(%RBP),%XMM0,%XMM6 |
(94) 0x40fa56 MOV 0x20(%R13),%R10 |
(94) 0x40fa5a LEA (%RBX,%RBX,2),%RSI |
(94) 0x40fa5e LEA -0x38(%RBP),%RDI |
(94) 0x40fa62 ADD $0x4,%RBX |
(94) 0x40fa66 LEA (%R10,%RSI,2),%R15 |
(94) 0x40fa6a VMOVSD %XMM6,(%R15) |
(94) 0x40fa6f CALL 40f690 <gasdev> |
(94) 0x40fa74 VMULSD -0x48(%RBP),%XMM0,%XMM7 |
(94) 0x40fa79 LEA -0x38(%RBP),%RDI |
(94) 0x40fa7d VMOVSD %XMM7,0x8(%R15) |
(94) 0x40fa83 CALL 40f690 <gasdev> |
(94) 0x40fa88 VMULSD -0x48(%RBP),%XMM0,%XMM8 |
(94) 0x40fa8d VMOVSD %XMM8,0x10(%R15) |
(94) 0x40fa93 CMP %RBX,-0x50(%RBP) |
(94) 0x40fa97 JNE 40f870 |
(93) 0x40fa9d MOV -0x70(%RBP),%R11 |
(93) 0x40faa1 INC %R11 |
(93) 0x40faa4 CMP %R11D,-0x7c(%RBP) |
(93) 0x40faa8 JG 40f810 |
0x40faae ADD $0x68,%RSP |
0x40fab2 POP %RBX |
0x40fab3 POP %R12 |
0x40fab5 POP %R13 |
0x40fab7 POP %R14 |
0x40fab9 POP %R15 |
0x40fabb POP %RBP |
0x40fabc RET |
0x40fabd INC %EAX |
0x40fabf XOR %EDX,%EDX |
0x40fac1 JMP 40f7bc |
0x40fac6 NOPW %CS:(%RAX,%RAX,1) |
Path / |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40fabd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40faae <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x34e0(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40f7bc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | initAtoms.c:151-162 |
Module | exec |
nb instructions | 48 |
nb uops | 53 |
loop length | 178 |
used x86 registers | 13 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 4 |
micro-operation queue | 8.83 cycles |
front end | 8.83 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.10 | 4.00 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
cycles | 3.10 | 5.53 | 4.33 | 4.33 | 6.00 | 3.07 | 2.90 | 6.00 | 6.00 | 6.00 | 2.93 | 4.33 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 8.53-8.57 |
Stall cycles | 0.00 |
Front-end | 8.83 |
Dispatch | 6.00 |
DIV/SQRT | 6.00 |
Overall L1 | 8.83 |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | 0% |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 9% |
load | 12% |
store | 10% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | 12% |
mul | 12% |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 9% |
load | 12% |
store | 10% |
mul | 12% |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 8% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R15 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
SUB $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV (%RDI),%RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CALL 403060 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV 0x18(%RBX),%R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %EAX,%R12D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403150 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R10D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0xc(%R14),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %R12D | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40fabd <setTemperature._omp_fn.0+0x33d> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R10D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R10,1),%R11D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %R11D,%R10D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40faae <setTemperature._omp_fn.0+0x32e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x34e0(%RIP),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VMULSD 0x8(%R13),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
MOV %RBX,-0x88(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV 0x78(%R14),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV %R11D,-0x7c(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOV $0x613606df9756715,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
MOV %RCX,-0x78(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
MOVSXD %R10D,%RCX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
MOV %RCX,%R11 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x68(%RBP) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
ADD $0x68,%RSP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40f7bc <setTemperature._omp_fn.0+0x3c> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼setTemperature._omp_fn.0– | 0.01 | 0 |
▼Loop 93 - initAtoms.c:154-162 - exec– | 0 | 0 |
○Loop 94 - initAtoms.c:154-162 - exec | 0.01 | 0 |