Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 4.3% |
---|
Function: advancePosition._omp_fn.0 | Module: exec | Source: timestep.c:85-94 | Coverage: 4.3% |
---|
/scratch_na/users/xoserete/qaas_runs/171-416-1926/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
0x40e810 PUSH %RBP |
0x40e811 MOV %RSP,%RBP |
0x40e814 PUSH %R14 |
0x40e816 PUSH %R13 |
0x40e818 PUSH %R12 |
0x40e81a MOV %RDI,%R12 |
0x40e81d PUSH %RBX |
0x40e81e CALL 403070 <omp_get_num_threads@plt> |
0x40e823 MOV %EAX,%EBX |
0x40e825 CALL 403160 <omp_get_thread_num@plt> |
0x40e82a MOV %EAX,%R9D |
0x40e82d MOV 0x10(%R12),%EAX |
0x40e832 CLTD |
0x40e833 IDIV %EBX |
0x40e835 CMP %EDX,%R9D |
0x40e838 JL 40eafc |
0x40e83e IMUL %EAX,%R9D |
0x40e842 ADD %EDX,%R9D |
0x40e845 LEA (%RAX,%R9,1),%EBX |
0x40e849 CMP %EBX,%R9D |
0x40e84c JGE 40eaf3 |
0x40e852 VMOVSD 0x8(%R12),%XMM0 |
0x40e859 MOV (%R12),%R12 |
0x40e85d MOVSXD %R9D,%R10 |
0x40e860 SAL $0x6,%R9D |
0x40e864 LEA (%R10,%R10,2),%RAX |
0x40e868 VMOVSD 0x1ec8(%RIP),%XMM2 |
0x40e870 MOV 0x18(%R12),%RCX |
0x40e875 SAL $0x9,%RAX |
0x40e879 MOV 0x78(%RCX),%R13 |
0x40e87d NOPL (%RAX) |
(91) 0x40e880 MOVSXD (%R13,%R10,4),%R8 |
(91) 0x40e885 TEST %R8D,%R8D |
(91) 0x40e888 JLE 40eadd |
(91) 0x40e88e MOV 0x20(%R12),%R11 |
(91) 0x40e893 MOVSXD %R9D,%RSI |
(91) 0x40e896 MOV 0x28(%R12),%RDI |
(91) 0x40e89b MOV 0x10(%R11),%R14 |
(91) 0x40e89f MOV 0x18(%R11),%RDX |
(91) 0x40e8a3 MOV 0x20(%R11),%RCX |
(91) 0x40e8a7 MOV %R10,%R11 |
(91) 0x40e8aa SAL $0x6,%R11 |
(91) 0x40e8ae LEA (%R14,%RSI,4),%RSI |
(91) 0x40e8b2 ADD %RAX,%RDX |
(91) 0x40e8b5 ADD %R8,%R11 |
(91) 0x40e8b8 ADD %RAX,%RCX |
(91) 0x40e8bb LEA (%R14,%R11,4),%R11 |
(91) 0x40e8bf MOV %R11,%R8 |
(91) 0x40e8c2 SUB %RSI,%R8 |
(91) 0x40e8c5 SUB $0x4,%R8 |
(91) 0x40e8c9 SHR $0x2,%R8 |
(91) 0x40e8cd INC %R8 |
(91) 0x40e8d0 AND $0x3,%R8D |
(91) 0x40e8d4 JE 40e9ce |
(91) 0x40e8da CMP $0x1,%R8 |
(91) 0x40e8de JE 40e97c |
(91) 0x40e8e4 CMP $0x2,%R8 |
(91) 0x40e8e8 JE 40e933 |
(91) 0x40e8ea MOVSXD (%RSI),%R14 |
(91) 0x40e8ed VMULSD (%RCX),%XMM0,%XMM3 |
(91) 0x40e8f1 ADD $0x4,%RSI |
(91) 0x40e8f5 ADD $0x18,%RDX |
(91) 0x40e8f9 ADD $0x18,%RCX |
(91) 0x40e8fd SAL $0x4,%R14 |
(91) 0x40e901 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM1 |
(91) 0x40e908 VFMADD213SD -0x18(%RDX),%XMM1,%XMM3 |
(91) 0x40e90e VMOVSD %XMM3,-0x18(%RDX) |
(91) 0x40e913 VMULSD -0x10(%RCX),%XMM0,%XMM4 |
(91) 0x40e918 VFMADD213SD -0x10(%RDX),%XMM1,%XMM4 |
(91) 0x40e91e VMOVSD %XMM4,-0x10(%RDX) |
(91) 0x40e923 VMULSD -0x8(%RCX),%XMM0,%XMM5 |
(91) 0x40e928 VFMADD213SD -0x8(%RDX),%XMM5,%XMM1 |
(91) 0x40e92e VMOVSD %XMM1,-0x8(%RDX) |
(91) 0x40e933 MOVSXD (%RSI),%R8 |
(91) 0x40e936 VMULSD (%RCX),%XMM0,%XMM7 |
(91) 0x40e93a ADD $0x4,%RSI |
(91) 0x40e93e ADD $0x18,%RDX |
(91) 0x40e942 ADD $0x18,%RCX |
(91) 0x40e946 SAL $0x4,%R8 |
(91) 0x40e94a VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM6 |
(91) 0x40e951 VFMADD213SD -0x18(%RDX),%XMM6,%XMM7 |
(91) 0x40e957 VMOVSD %XMM7,-0x18(%RDX) |
(91) 0x40e95c VMULSD -0x10(%RCX),%XMM0,%XMM8 |
(91) 0x40e961 VFMADD213SD -0x10(%RDX),%XMM6,%XMM8 |
(91) 0x40e967 VMOVSD %XMM8,-0x10(%RDX) |
(91) 0x40e96c VMULSD -0x8(%RCX),%XMM0,%XMM9 |
(91) 0x40e971 VFMADD213SD -0x8(%RDX),%XMM9,%XMM6 |
(91) 0x40e977 VMOVSD %XMM6,-0x8(%RDX) |
(91) 0x40e97c MOVSXD (%RSI),%R14 |
(91) 0x40e97f VMULSD (%RCX),%XMM0,%XMM11 |
(91) 0x40e983 ADD $0x4,%RSI |
(91) 0x40e987 ADD $0x18,%RDX |
(91) 0x40e98b ADD $0x18,%RCX |
(91) 0x40e98f SAL $0x4,%R14 |
(91) 0x40e993 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM10 |
(91) 0x40e99a VFMADD213SD -0x18(%RDX),%XMM10,%XMM11 |
(91) 0x40e9a0 VMOVSD %XMM11,-0x18(%RDX) |
(91) 0x40e9a5 VMULSD -0x10(%RCX),%XMM0,%XMM12 |
(91) 0x40e9aa VFMADD213SD -0x10(%RDX),%XMM10,%XMM12 |
(91) 0x40e9b0 VMOVSD %XMM12,-0x10(%RDX) |
(91) 0x40e9b5 VMULSD -0x8(%RCX),%XMM0,%XMM13 |
(91) 0x40e9ba VFMADD213SD -0x8(%RDX),%XMM13,%XMM10 |
(91) 0x40e9c0 VMOVSD %XMM10,-0x8(%RDX) |
(91) 0x40e9c5 CMP %RSI,%R11 |
(91) 0x40e9c8 JE 40eadd |
(92) 0x40e9ce MOVSXD (%RSI),%R8 |
(92) 0x40e9d1 VMULSD (%RCX),%XMM0,%XMM15 |
(92) 0x40e9d5 ADD $0x10,%RSI |
(92) 0x40e9d9 ADD $0x60,%RDX |
(92) 0x40e9dd MOVSXD -0xc(%RSI),%R14 |
(92) 0x40e9e1 ADD $0x60,%RCX |
(92) 0x40e9e5 SAL $0x4,%R8 |
(92) 0x40e9e9 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM14 |
(92) 0x40e9f0 SAL $0x4,%R14 |
(92) 0x40e9f4 MOVSXD -0x8(%RSI),%R8 |
(92) 0x40e9f8 VFMADD213SD -0x60(%RDX),%XMM14,%XMM15 |
(92) 0x40e9fe SAL $0x4,%R8 |
(92) 0x40ea02 VMOVSD %XMM15,-0x60(%RDX) |
(92) 0x40ea07 VMULSD -0x58(%RCX),%XMM0,%XMM1 |
(92) 0x40ea0c VFMADD213SD -0x58(%RDX),%XMM14,%XMM1 |
(92) 0x40ea12 VMOVSD %XMM1,-0x58(%RDX) |
(92) 0x40ea17 VMULSD -0x50(%RCX),%XMM0,%XMM3 |
(92) 0x40ea1c VFMADD213SD -0x50(%RDX),%XMM3,%XMM14 |
(92) 0x40ea22 VMOVSD %XMM14,-0x50(%RDX) |
(92) 0x40ea27 VMULSD -0x48(%RCX),%XMM0,%XMM5 |
(92) 0x40ea2c VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM4 |
(92) 0x40ea33 VFMADD213SD -0x48(%RDX),%XMM4,%XMM5 |
(92) 0x40ea39 VMOVSD %XMM5,-0x48(%RDX) |
(92) 0x40ea3e VMULSD -0x40(%RCX),%XMM0,%XMM6 |
(92) 0x40ea43 VFMADD213SD -0x40(%RDX),%XMM4,%XMM6 |
(92) 0x40ea49 VMOVSD %XMM6,-0x40(%RDX) |
(92) 0x40ea4e VMULSD -0x38(%RCX),%XMM0,%XMM7 |
(92) 0x40ea53 VFMADD213SD -0x38(%RDX),%XMM7,%XMM4 |
(92) 0x40ea59 VMOVSD %XMM4,-0x38(%RDX) |
(92) 0x40ea5e VMULSD -0x30(%RCX),%XMM0,%XMM9 |
(92) 0x40ea63 VDIVSD 0x8(%RDI,%R8,1),%XMM2,%XMM8 |
(92) 0x40ea6a VFMADD213SD -0x30(%RDX),%XMM8,%XMM9 |
(92) 0x40ea70 VMOVSD %XMM9,-0x30(%RDX) |
(92) 0x40ea75 VMULSD -0x28(%RCX),%XMM0,%XMM10 |
(92) 0x40ea7a VFMADD213SD -0x28(%RDX),%XMM8,%XMM10 |
(92) 0x40ea80 VMOVSD %XMM10,-0x28(%RDX) |
(92) 0x40ea85 VMULSD -0x20(%RCX),%XMM0,%XMM11 |
(92) 0x40ea8a VFMADD213SD -0x20(%RDX),%XMM11,%XMM8 |
(92) 0x40ea90 VMOVSD %XMM8,-0x20(%RDX) |
(92) 0x40ea95 MOVSXD -0x4(%RSI),%R14 |
(92) 0x40ea99 VMULSD -0x18(%RCX),%XMM0,%XMM13 |
(92) 0x40ea9e SAL $0x4,%R14 |
(92) 0x40eaa2 VDIVSD 0x8(%RDI,%R14,1),%XMM2,%XMM12 |
(92) 0x40eaa9 VFMADD213SD -0x18(%RDX),%XMM12,%XMM13 |
(92) 0x40eaaf VMOVSD %XMM13,-0x18(%RDX) |
(92) 0x40eab4 VMULSD -0x10(%RCX),%XMM0,%XMM14 |
(92) 0x40eab9 VFMADD213SD -0x10(%RDX),%XMM12,%XMM14 |
(92) 0x40eabf VMOVSD %XMM14,-0x10(%RDX) |
(92) 0x40eac4 VMULSD -0x8(%RCX),%XMM0,%XMM15 |
(92) 0x40eac9 VFMADD213SD -0x8(%RDX),%XMM15,%XMM12 |
(92) 0x40eacf VMOVSD %XMM12,-0x8(%RDX) |
(92) 0x40ead4 CMP %RSI,%R11 |
(92) 0x40ead7 JNE 40e9ce |
(91) 0x40eadd INC %R10 |
(91) 0x40eae0 ADD $0x40,%R9D |
(91) 0x40eae4 ADD $0x600,%RAX |
(91) 0x40eaea CMP %R10D,%EBX |
(91) 0x40eaed JG 40e880 |
0x40eaf3 POP %RBX |
0x40eaf4 POP %R12 |
0x40eaf6 POP %R13 |
0x40eaf8 POP %R14 |
0x40eafa POP %RBP |
0x40eafb RET |
0x40eafc INC %EAX |
0x40eafe XOR %EDX,%EDX |
0x40eb00 JMP 40e83e |
0x40eb05 NOPW %CS:(%RAX,%RAX,1) |
Coverage (%) | Name | Source Location | Module |
---|---|---|---|
○97.73 | gomp_thread_start | team.c:130 | libgomp.so.1.0.0 |
○2.25 | GOMP_parallel | libgomp.h:985 | libgomp.so.1.0.0 |
Path / |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 46 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 4.00 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
cycles | 3.00 | 5.33 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 7.33-7.36 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 7.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40eafc <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40eaf3 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x6,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x1ec8(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40e83e <advancePosition._omp_fn.0+0x2e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Source file and lines | timestep.c:85-94 |
Module | exec |
nb instructions | 41 |
nb uops | 46 |
loop length | 141 |
used x86 registers | 12 |
used mmx registers | 0 |
used xmm registers | 2 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 7.67 cycles |
front end | 7.67 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 3.00 | 4.00 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
cycles | 3.00 | 5.33 | 4.00 | 4.00 | 3.50 | 3.07 | 3.00 | 3.50 | 3.50 | 3.50 | 2.93 | 4.00 |
Cycles executing div or sqrt instructions | 6.00 |
FE+BE cycles | 7.33-7.36 |
Stall cycles | 0.00 |
Front-end | 7.67 |
Dispatch | 5.33 |
DIV/SQRT | 6.00 |
Overall L1 | 7.67 |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 0% |
other | 0% |
all | 7% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 7% |
all | 12% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 8% |
load | 12% |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | NA (no fma vectorizable/vectorized instructions) |
div/sqrt | 6% |
other | 7% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PUSH %RBP | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %R14 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R13 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
PUSH %R12 | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
MOV %RDI,%R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
PUSH %RBX | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 5-12 | 0.50 |
CALL 403070 <omp_get_num_threads@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%EBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
CALL 403160 <omp_get_thread_num@plt> | 2 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0 | 1 |
MOV %EAX,%R9D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
MOV 0x10(%R12),%EAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
CLTD | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
IDIV %EBX | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11-16 | 6 |
CMP %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JL 40eafc <advancePosition._omp_fn.0+0x2ec> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
IMUL %EAX,%R9D | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %EDX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RAX,%R9,1),%EBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
CMP %EBX,%R9D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JGE 40eaf3 <advancePosition._omp_fn.0+0x2e3> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD 0x8(%R12),%XMM0 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV (%R12),%R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOVSXD %R9D,%R10 | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
SAL $0x6,%R9D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
LEA (%R10,%R10,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
VMOVSD 0x1ec8(%RIP),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x18(%R12),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SAL $0x9,%RAX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV 0x78(%RCX),%R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
POP %RBX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %R14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
POP %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1-6 | 0.33 |
RET | 1 | 0.50 | 0 | 0.33 | 0.33 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0.33 | 0 | 2.13 |
INC %EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
JMP 40e83e <advancePosition._omp_fn.0+0x2e> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
Name | Coverage (%) | Time (s) |
---|---|---|
▼advancePosition._omp_fn.0– | 4.3 | 0.67 |
▼Loop 91 - timestep.c:88-94 - exec– | 1.46 | 0.16 |
○Loop 92 - timestep.c:88-94 - exec | 2.83 | 0.3 |