Loop Id: 17 | Module: exec | Source: timestep.c:74-78 | Coverage: 2.49% |
---|
Loop Id: 17 | Module: exec | Source: timestep.c:74-78 | Coverage: 2.49% |
---|
0x405ab8 MOVSXD (%R9,%R12,4),%RDI |
0x405abc TEST %EDI,%EDI |
0x405abe JLE 405e92 |
0x405ac4 MOV 0x20(%R10),%R11 |
0x405ac8 LEA (%RDI,%RDI,2),%RBX |
0x405acc LEA -0x18(,%RBX,8),%RDI |
0x405ad4 MOV 0x20(%R11),%RSI |
0x405ad8 MOV 0x28(%R11),%RCX |
0x405adc SHR $0x3,%RDI |
0x405ae0 MOV $0xaaaaaaaaaaaaaab,%R11 |
0x405aea IMUL %R11,%RDI |
0x405aee ADD %R8,%RSI |
0x405af1 ADD %R8,%RCX |
0x405af4 LEA (%RSI,%RBX,8),%RDX |
0x405af8 INC %RDI |
0x405afb AND $0x7,%EDI |
0x405afe JE 405cbb |
0x405b04 CMP $0x1,%RDI |
0x405b08 JE 405c7c |
0x405b0e CMP $0x2,%RDI |
0x405b12 JE 405c46 |
0x405b18 CMP $0x3,%RDI |
0x405b1c JE 405c10 |
0x405b22 CMP $0x4,%RDI |
0x405b26 JE 405bda |
0x405b2c CMP $0x5,%RDI |
0x405b30 JE 405ba4 |
0x405b32 CMP $0x6,%RDI |
0x405b36 JE 405b6e |
0x405b38 VMOVSD (%RCX),%XMM1 |
0x405b3c VFMADD213SD (%RSI),%XMM0,%XMM1 |
0x405b41 ADD $0x18,%RCX |
0x405b45 ADD $0x18,%RSI |
0x405b49 VMOVSD %XMM1,-0x18(%RSI) |
0x405b4e VMOVSD -0x10(%RCX),%XMM2 |
0x405b53 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
0x405b59 VMOVSD %XMM2,-0x10(%RSI) |
0x405b5e VMOVSD -0x8(%RCX),%XMM3 |
0x405b63 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
0x405b69 VMOVSD %XMM3,-0x8(%RSI) |
0x405b6e VMOVSD (%RCX),%XMM4 |
0x405b72 VFMADD213SD (%RSI),%XMM0,%XMM4 |
0x405b77 ADD $0x18,%RCX |
0x405b7b ADD $0x18,%RSI |
0x405b7f VMOVSD %XMM4,-0x18(%RSI) |
0x405b84 VMOVSD -0x10(%RCX),%XMM5 |
0x405b89 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
0x405b8f VMOVSD %XMM5,-0x10(%RSI) |
0x405b94 VMOVSD -0x8(%RCX),%XMM6 |
0x405b99 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
0x405b9f VMOVSD %XMM6,-0x8(%RSI) |
0x405ba4 VMOVSD (%RCX),%XMM7 |
0x405ba8 VFMADD213SD (%RSI),%XMM0,%XMM7 |
0x405bad ADD $0x18,%RCX |
0x405bb1 ADD $0x18,%RSI |
0x405bb5 VMOVSD %XMM7,-0x18(%RSI) |
0x405bba VMOVSD -0x10(%RCX),%XMM8 |
0x405bbf VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 |
0x405bc5 VMOVSD %XMM8,-0x10(%RSI) |
0x405bca VMOVSD -0x8(%RCX),%XMM9 |
0x405bcf VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 |
0x405bd5 VMOVSD %XMM9,-0x8(%RSI) |
0x405bda VMOVSD (%RCX),%XMM10 |
0x405bde VFMADD213SD (%RSI),%XMM0,%XMM10 |
0x405be3 ADD $0x18,%RCX |
0x405be7 ADD $0x18,%RSI |
0x405beb VMOVSD %XMM10,-0x18(%RSI) |
0x405bf0 VMOVSD -0x10(%RCX),%XMM11 |
0x405bf5 VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 |
0x405bfb VMOVSD %XMM11,-0x10(%RSI) |
0x405c00 VMOVSD -0x8(%RCX),%XMM12 |
0x405c05 VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 |
0x405c0b VMOVSD %XMM12,-0x8(%RSI) |
0x405c10 VMOVSD (%RCX),%XMM13 |
0x405c14 VFMADD213SD (%RSI),%XMM0,%XMM13 |
0x405c19 ADD $0x18,%RCX |
0x405c1d ADD $0x18,%RSI |
0x405c21 VMOVSD %XMM13,-0x18(%RSI) |
0x405c26 VMOVSD -0x10(%RCX),%XMM14 |
0x405c2b VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
0x405c31 VMOVSD %XMM14,-0x10(%RSI) |
0x405c36 VMOVSD -0x8(%RCX),%XMM15 |
0x405c3b VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
0x405c41 VMOVSD %XMM15,-0x8(%RSI) |
0x405c46 VMOVSD (%RCX),%XMM1 |
0x405c4a VFMADD213SD (%RSI),%XMM0,%XMM1 |
0x405c4f ADD $0x18,%RCX |
0x405c53 ADD $0x18,%RSI |
0x405c57 VMOVSD %XMM1,-0x18(%RSI) |
0x405c5c VMOVSD -0x10(%RCX),%XMM2 |
0x405c61 VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 |
0x405c67 VMOVSD %XMM2,-0x10(%RSI) |
0x405c6c VMOVSD -0x8(%RCX),%XMM3 |
0x405c71 VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 |
0x405c77 VMOVSD %XMM3,-0x8(%RSI) |
0x405c7c VMOVSD (%RCX),%XMM4 |
0x405c80 VFMADD213SD (%RSI),%XMM0,%XMM4 |
0x405c85 ADD $0x18,%RSI |
0x405c89 ADD $0x18,%RCX |
0x405c8d VMOVSD %XMM4,-0x18(%RSI) |
0x405c92 VMOVSD -0x10(%RCX),%XMM5 |
0x405c97 VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 |
0x405c9d VMOVSD %XMM5,-0x10(%RSI) |
0x405ca2 VMOVSD -0x8(%RCX),%XMM6 |
0x405ca7 VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 |
0x405cad VMOVSD %XMM6,-0x8(%RSI) |
0x405cb2 CMP %RDX,%RSI |
0x405cb5 JE 405e92 |
(18) 0x405cbb VMOVSD (%RCX),%XMM7 |
(18) 0x405cbf VFMADD213SD (%RSI),%XMM0,%XMM7 |
(18) 0x405cc4 ADD $0xc0,%RSI |
(18) 0x405ccb ADD $0xc0,%RCX |
(18) 0x405cd2 VMOVSD %XMM7,-0xc0(%RSI) |
(18) 0x405cda VMOVSD -0xb8(%RCX),%XMM8 |
(18) 0x405ce2 VFMADD213SD -0xb8(%RSI),%XMM0,%XMM8 |
(18) 0x405ceb VMOVSD %XMM8,-0xb8(%RSI) |
(18) 0x405cf3 VMOVSD -0xb0(%RCX),%XMM9 |
(18) 0x405cfb VFMADD213SD -0xb0(%RSI),%XMM0,%XMM9 |
(18) 0x405d04 VMOVSD %XMM9,-0xb0(%RSI) |
(18) 0x405d0c VMOVSD -0xa8(%RCX),%XMM10 |
(18) 0x405d14 VFMADD213SD -0xa8(%RSI),%XMM0,%XMM10 |
(18) 0x405d1d VMOVSD %XMM10,-0xa8(%RSI) |
(18) 0x405d25 VMOVSD -0xa0(%RCX),%XMM11 |
(18) 0x405d2d VFMADD213SD -0xa0(%RSI),%XMM0,%XMM11 |
(18) 0x405d36 VMOVSD %XMM11,-0xa0(%RSI) |
(18) 0x405d3e VMOVSD -0x98(%RCX),%XMM12 |
(18) 0x405d46 VFMADD213SD -0x98(%RSI),%XMM0,%XMM12 |
(18) 0x405d4f VMOVSD %XMM12,-0x98(%RSI) |
(18) 0x405d57 VMOVSD -0x90(%RCX),%XMM13 |
(18) 0x405d5f VFMADD213SD -0x90(%RSI),%XMM0,%XMM13 |
(18) 0x405d68 VMOVSD %XMM13,-0x90(%RSI) |
(18) 0x405d70 VMOVSD -0x88(%RCX),%XMM14 |
(18) 0x405d78 VFMADD213SD -0x88(%RSI),%XMM0,%XMM14 |
(18) 0x405d81 VMOVSD %XMM14,-0x88(%RSI) |
(18) 0x405d89 VMOVSD -0x80(%RCX),%XMM15 |
(18) 0x405d8e VFMADD213SD -0x80(%RSI),%XMM0,%XMM15 |
(18) 0x405d94 VMOVSD %XMM15,-0x80(%RSI) |
(18) 0x405d99 VMOVSD -0x78(%RCX),%XMM1 |
(18) 0x405d9e VFMADD213SD -0x78(%RSI),%XMM0,%XMM1 |
(18) 0x405da4 VMOVSD %XMM1,-0x78(%RSI) |
(18) 0x405da9 VMOVSD -0x70(%RCX),%XMM2 |
(18) 0x405dae VFMADD213SD -0x70(%RSI),%XMM0,%XMM2 |
(18) 0x405db4 VMOVSD %XMM2,-0x70(%RSI) |
(18) 0x405db9 VMOVSD -0x68(%RCX),%XMM3 |
(18) 0x405dbe VFMADD213SD -0x68(%RSI),%XMM0,%XMM3 |
(18) 0x405dc4 VMOVSD %XMM3,-0x68(%RSI) |
(18) 0x405dc9 VMOVSD -0x60(%RCX),%XMM4 |
(18) 0x405dce VFMADD213SD -0x60(%RSI),%XMM0,%XMM4 |
(18) 0x405dd4 VMOVSD %XMM4,-0x60(%RSI) |
(18) 0x405dd9 VMOVSD -0x58(%RCX),%XMM5 |
(18) 0x405dde VFMADD213SD -0x58(%RSI),%XMM0,%XMM5 |
(18) 0x405de4 VMOVSD %XMM5,-0x58(%RSI) |
(18) 0x405de9 VMOVSD -0x50(%RCX),%XMM6 |
(18) 0x405dee VFMADD213SD -0x50(%RSI),%XMM0,%XMM6 |
(18) 0x405df4 VMOVSD %XMM6,-0x50(%RSI) |
(18) 0x405df9 VMOVSD -0x48(%RCX),%XMM7 |
(18) 0x405dfe VFMADD213SD -0x48(%RSI),%XMM0,%XMM7 |
(18) 0x405e04 VMOVSD %XMM7,-0x48(%RSI) |
(18) 0x405e09 VMOVSD -0x40(%RCX),%XMM8 |
(18) 0x405e0e VFMADD213SD -0x40(%RSI),%XMM0,%XMM8 |
(18) 0x405e14 VMOVSD %XMM8,-0x40(%RSI) |
(18) 0x405e19 VMOVSD -0x38(%RCX),%XMM9 |
(18) 0x405e1e VFMADD213SD -0x38(%RSI),%XMM0,%XMM9 |
(18) 0x405e24 VMOVSD %XMM9,-0x38(%RSI) |
(18) 0x405e29 VMOVSD -0x30(%RCX),%XMM10 |
(18) 0x405e2e VFMADD213SD -0x30(%RSI),%XMM0,%XMM10 |
(18) 0x405e34 VMOVSD %XMM10,-0x30(%RSI) |
(18) 0x405e39 VMOVSD -0x28(%RCX),%XMM11 |
(18) 0x405e3e VFMADD213SD -0x28(%RSI),%XMM0,%XMM11 |
(18) 0x405e44 VMOVSD %XMM11,-0x28(%RSI) |
(18) 0x405e49 VMOVSD -0x20(%RCX),%XMM12 |
(18) 0x405e4e VFMADD213SD -0x20(%RSI),%XMM0,%XMM12 |
(18) 0x405e54 VMOVSD %XMM12,-0x20(%RSI) |
(18) 0x405e59 VMOVSD -0x18(%RCX),%XMM13 |
(18) 0x405e5e VFMADD213SD -0x18(%RSI),%XMM0,%XMM13 |
(18) 0x405e64 VMOVSD %XMM13,-0x18(%RSI) |
(18) 0x405e69 VMOVSD -0x10(%RCX),%XMM14 |
(18) 0x405e6e VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 |
(18) 0x405e74 VMOVSD %XMM14,-0x10(%RSI) |
(18) 0x405e79 VMOVSD -0x8(%RCX),%XMM15 |
(18) 0x405e7e VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 |
(18) 0x405e84 VMOVSD %XMM15,-0x8(%RSI) |
(18) 0x405e89 CMP %RDX,%RSI |
(18) 0x405e8c JNE 405cbb |
0x405e92 INC %R12 |
0x405e95 ADD $0x600,%R8 |
0x405e9c CMP %R12D,%EAX |
0x405e9f JG 405ab8 |
/home/eoseret/qaas_runs_CPU_9468/171-110-4860/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 74 - 78 |
-------------------------------------------------------------------------------- |
74: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
75: { |
76: s->atoms->p[iOff][0] += dt*s->atoms->f[iOff][0]; |
77: s->atoms->p[iOff][1] += dt*s->atoms->f[iOff][1]; |
78: s->atoms->p[iOff][2] += dt*s->atoms->f[iOff][2]; |
Path / |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.29 |
CQA speedup if FP arith vectorized | 1.31 |
CQA speedup if fully vectorized | 9.79 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.21 |
Bottlenecks | micro-operation queue, |
Function | advanceVelocity._omp_fn.0 |
Source | timestep.c:74-78 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 18.50 |
CQA cycles if no scalar integer | 14.33 |
CQA cycles if FP arith vectorized | 14.17 |
CQA cycles if fully vectorized | 1.89 |
Front-end cycles | 18.50 |
DIV/SQRT cycles | 13.50 |
P0 cycles | 13.00 |
P1 cycles | 15.33 |
P2 cycles | 15.33 |
P3 cycles | 10.50 |
P4 cycles | 6.60 |
P5 cycles | 6.50 |
P6 cycles | 10.50 |
P7 cycles | 10.50 |
P8 cycles | 10.50 |
P9 cycles | 6.40 |
P10 cycles | 15.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 25.17 |
Stall cycles (UFS) | 6.52 |
Nb insns | 112.00 |
Nb uops | 111.00 |
Nb loads | 46.00 |
Nb stores | 21.00 |
Nb stack references | 0.00 |
FLOP/cycle | 2.27 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 21.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 28.76 |
Bytes prefetched | 0.00 |
Bytes loaded | 364.00 |
Bytes stored | 168.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.41 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.72 |
Metric | Value |
---|---|
CQA speedup if no scalar integer | 1.29 |
CQA speedup if FP arith vectorized | 1.31 |
CQA speedup if fully vectorized | 9.79 |
CQA speedup if no inter-iteration dependency | NA |
CQA speedup if next bottleneck killed | 1.21 |
Bottlenecks | micro-operation queue, |
Function | advanceVelocity._omp_fn.0 |
Source | timestep.c:74-78 |
Source loop unroll info | NA |
Source loop unroll confidence level | NA |
Unroll/vectorization loop type | NA |
Unroll factor | NA |
CQA cycles | 18.50 |
CQA cycles if no scalar integer | 14.33 |
CQA cycles if FP arith vectorized | 14.17 |
CQA cycles if fully vectorized | 1.89 |
Front-end cycles | 18.50 |
DIV/SQRT cycles | 13.50 |
P0 cycles | 13.00 |
P1 cycles | 15.33 |
P2 cycles | 15.33 |
P3 cycles | 10.50 |
P4 cycles | 6.60 |
P5 cycles | 6.50 |
P6 cycles | 10.50 |
P7 cycles | 10.50 |
P8 cycles | 10.50 |
P9 cycles | 6.40 |
P10 cycles | 15.33 |
P11 cycles | 0.00 |
Inter-iter dependencies cycles | NA |
FE+BE cycles (UFS) | 25.17 |
Stall cycles (UFS) | 6.52 |
Nb insns | 112.00 |
Nb uops | 111.00 |
Nb loads | 46.00 |
Nb stores | 21.00 |
Nb stack references | 0.00 |
FLOP/cycle | 2.27 |
Nb FLOP add-sub | 0.00 |
Nb FLOP mul | 0.00 |
Nb FLOP fma | 21.00 |
Nb FLOP div | 0.00 |
Nb FLOP rcp | 0.00 |
Nb FLOP sqrt | 0.00 |
Nb FLOP rsqrt | 0.00 |
Bytes/cycle | 28.76 |
Bytes prefetched | 0.00 |
Bytes loaded | 364.00 |
Bytes stored | 168.00 |
Stride 0 | NA |
Stride 1 | NA |
Stride n | NA |
Stride unknown | NA |
Stride indirect | NA |
Vectorization ratio all | 0.00 |
Vectorization ratio load | 0.00 |
Vectorization ratio store | 0.00 |
Vectorization ratio mul | NA |
Vectorization ratio add_sub | 0.00 |
Vectorization ratio fma | 0.00 |
Vectorization ratio div_sqrt | NA |
Vectorization ratio other | 0.00 |
Vector-efficiency ratio all | 12.41 |
Vector-efficiency ratio load | 12.50 |
Vector-efficiency ratio store | 12.50 |
Vector-efficiency ratio mul | NA |
Vector-efficiency ratio add_sub | 12.50 |
Vector-efficiency ratio fma | 12.50 |
Vector-efficiency ratio div_sqrt | NA |
Vector-efficiency ratio other | 11.72 |
Path / |
Function | advanceVelocity._omp_fn.0 |
Source file and lines | timestep.c:74-78 |
Module | exec |
nb instructions | 112 |
nb uops | 111 |
loop length | 534 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.50 | 13.00 | 15.33 | 15.33 | 10.50 | 6.60 | 6.50 | 10.50 | 10.50 | 10.50 | 6.40 | 15.33 |
cycles | 13.50 | 13.00 | 15.33 | 15.33 | 10.50 | 6.60 | 6.50 | 10.50 | 10.50 | 10.50 | 6.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.17 |
Stall cycles | 6.52 |
LM full (events) | 8.03 |
Front-end | 18.50 |
Dispatch | 15.33 |
Overall L1 | 18.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%R9,%R12,4),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 405e92 <advanceVelocity._omp_fn.0+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R10),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RDI,2),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x18(,%RBX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0xaaaaaaaaaaaaaab,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
IMUL %R11,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RSI,%RBX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 405cbb <advanceVelocity._omp_fn.0+0x25b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c7c <advanceVelocity._omp_fn.0+0x21c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c46 <advanceVelocity._omp_fn.0+0x1e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c10 <advanceVelocity._omp_fn.0+0x1b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405bda <advanceVelocity._omp_fn.0+0x17a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405ba4 <advanceVelocity._omp_fn.0+0x144> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405b6e <advanceVelocity._omp_fn.0+0x10e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD (%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM1,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM5,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM7,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM9,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM10,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM11,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM12,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM13,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM14,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM15,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM1,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM5,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405e92 <advanceVelocity._omp_fn.0+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x600,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JG 405ab8 <advanceVelocity._omp_fn.0+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
Function | advanceVelocity._omp_fn.0 |
Source file and lines | timestep.c:74-78 |
Module | exec |
nb instructions | 112 |
nb uops | 111 |
loop length | 534 |
used x86 registers | 11 |
used mmx registers | 0 |
used xmm registers | 16 |
used ymm registers | 0 |
used zmm registers | 0 |
nb stack references | 0 |
micro-operation queue | 18.50 cycles |
front end | 18.50 cycles |
P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
uops | 13.50 | 13.00 | 15.33 | 15.33 | 10.50 | 6.60 | 6.50 | 10.50 | 10.50 | 10.50 | 6.40 | 15.33 |
cycles | 13.50 | 13.00 | 15.33 | 15.33 | 10.50 | 6.60 | 6.50 | 10.50 | 10.50 | 10.50 | 6.40 | 15.33 |
Cycles executing div or sqrt instructions | NA |
FE+BE cycles | 25.17 |
Stall cycles | 6.52 |
LM full (events) | 8.03 |
Front-end | 18.50 |
Dispatch | 15.33 |
Overall L1 | 18.50 |
all | 0% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 0% |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 0% |
load | 0% |
store | 0% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 0% |
fma | 0% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 0% |
all | 11% |
load | NA (no load vectorizable/vectorized instructions) |
store | NA (no store vectorizable/vectorized instructions) |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | NA (no fma vectorizable/vectorized instructions) |
other | 11% |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | NA (no add-sub vectorizable/vectorized instructions) |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | NA (no other vectorizable/vectorized instructions) |
all | 12% |
load | 12% |
store | 12% |
mul | NA (no mul vectorizable/vectorized instructions) |
add-sub | 12% |
fma | 12% |
div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
other | 11% |
Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
MOVSXD (%R9,%R12,4),%RDI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
TEST %EDI,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
JLE 405e92 <advanceVelocity._omp_fn.0+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
MOV 0x20(%R10),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
LEA (%RDI,%RDI,2),%RBX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
LEA -0x18(,%RBX,8),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
MOV 0x20(%R11),%RSI | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
MOV 0x28(%R11),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
SHR $0x3,%RDI | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
MOV $0xaaaaaaaaaaaaaab,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.28 |
IMUL %R11,%RDI | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
ADD %R8,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
ADD %R8,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
LEA (%RSI,%RBX,8),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
INC %RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
AND $0x7,%EDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
JE 405cbb <advanceVelocity._omp_fn.0+0x25b> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x1,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c7c <advanceVelocity._omp_fn.0+0x21c> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x2,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c46 <advanceVelocity._omp_fn.0+0x1e6> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x3,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405c10 <advanceVelocity._omp_fn.0+0x1b0> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x4,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405bda <advanceVelocity._omp_fn.0+0x17a> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x5,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405ba4 <advanceVelocity._omp_fn.0+0x144> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
CMP $0x6,%RDI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405b6e <advanceVelocity._omp_fn.0+0x10e> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
VMOVSD (%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM1,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM5,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM7 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM7 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM7,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM8 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM8,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM9 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM9,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM10 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM10,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM11 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM11,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM12 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM12 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM12,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM13 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM13 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM13,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM14 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM14,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM15 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM15 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM15,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM1 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM1 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM1,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM2 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM2,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM3 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM3 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM3,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD (%RCX),%XMM4 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD (%RSI),%XMM0,%XMM4 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
ADD $0x18,%RSI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x18,%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
VMOVSD %XMM4,-0x18(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x10(%RCX),%XMM5 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x10(%RSI),%XMM0,%XMM5 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM5,-0x10(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
VMOVSD -0x8(%RCX),%XMM6 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
VFMADD213SD -0x8(%RSI),%XMM0,%XMM6 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
VMOVSD %XMM6,-0x8(%RSI) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 1 | 0.50 |
CMP %RDX,%RSI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JE 405e92 <advanceVelocity._omp_fn.0+0x432> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
INC %R12 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
ADD $0x600,%R8 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
CMP %R12D,%EAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
JG 405ab8 <advanceVelocity._omp_fn.0+0x58> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |