| Loop Id: 93 | Module: exec | Source: timestep.c:85-94 | Coverage: 0.09% |
|---|
| Loop Id: 93 | Module: exec | Source: timestep.c:85-94 | Coverage: 0.09% |
|---|
0x411640 ADD $0x40,%ESI |
0x411643 CMP -0x40(%RBP),%RDI |
0x411647 LEA 0x1(%RDI),%RDI |
0x41164b JE 411590 |
0x411651 LEA (%RDI,%RDX,1),%RCX |
0x411655 MOV -0x60(%RBP),%RAX |
0x411659 MOV (%RAX,%RCX,4),%R15D |
0x41165d TEST %R15D,%R15D |
0x411660 JLE 411640 |
0x411662 MOV %ESI,%ECX |
0x411664 LEA (,%RCX,4),%R13 |
0x41166c LEA (%RDI,%RDX,1),%R12D |
0x411670 SAL $0x6,%R12D |
0x411674 MOV -0x58(%RBP),%RAX |
0x411678 MOV 0x20(%RAX),%RDX |
0x41167c MOV 0x28(%RAX),%R10 |
0x411680 MOV 0x10(%RDX),%R11 |
0x411684 MOV 0x18(%RDX),%R8 |
0x411688 MOV 0x20(%RDX),%R9 |
0x41168c LEA -0x1(%R15),%EDX |
0x411690 MOVSXD %EDX,%RDX |
0x411693 ADD %R12,%RDX |
0x411696 SAL $0x3,%RDX |
0x41169a LEA (%RDX,%RDX,2),%RDX |
0x41169e LEA 0x10(%R9,%RDX,1),%RBX |
0x4116a3 LEA (,%R12,8),%R14 |
0x4116ab LEA (%R14,%R14,2),%R14 |
0x4116af LEA (%R8,%R14,1),%RAX |
0x4116b3 CMP %RAX,%RBX |
0x4116b6 JB 411740 |
0x4116bc ADD %R9,%R14 |
0x4116bf LEA 0x10(%R8,%RDX,1),%RAX |
0x4116c4 CMP %R14,%RAX |
0x4116c7 JB 411740 |
0x4116c9 LEA (%RCX,%RCX,2),%RAX |
0x4116cd LEA 0x10(,%RAX,8),%RCX |
0x4116d5 ADD %R13,%R11 |
0x4116d8 XOR %EDX,%EDX |
0x4116da NOPW (%RAX,%RAX,1) |
(95) 0x4116e0 MOVSXD (%R11,%RDX,4),%RAX |
(95) 0x4116e4 SAL $0x4,%RAX |
(95) 0x4116e8 VDIVSD 0x8(%R10,%RAX,1),%XMM0,%XMM13 |
(95) 0x4116ef VMOVSD -0x10(%R9,%RCX,1),%XMM14 |
(95) 0x4116f6 VFMADD213SD -0x10(%R8,%RCX,1),%XMM13,%XMM14 |
(95) 0x4116fd VMOVSD %XMM14,-0x10(%R8,%RCX,1) |
(95) 0x411704 VMOVSD -0x8(%R9,%RCX,1),%XMM14 |
(95) 0x41170b VFMADD213SD -0x8(%R8,%RCX,1),%XMM13,%XMM14 |
(95) 0x411712 VMOVSD %XMM14,-0x8(%R8,%RCX,1) |
(95) 0x411719 VMOVSD (%R9,%RCX,1),%XMM14 |
(95) 0x41171f VFMADD213SD (%R8,%RCX,1),%XMM13,%XMM14 |
(95) 0x411725 VMOVSD %XMM14,(%R8,%RCX,1) |
(95) 0x41172b ADD $0x18,%RCX |
(95) 0x41172f INC %RDX |
(95) 0x411732 CMP %EDX,%R15D |
(95) 0x411735 JNE 4116e0 |
0x411737 MOV -0x38(%RBP),%RDX |
0x41173b JMP 411640 |
0x411740 MOV %R15D,%R14D |
0x411743 VPBROADCASTQ %R12,%ZMM13 |
0x411749 VPBROADCASTD %R15D,%YMM14 |
0x41174f AND $-0x8,%R14D |
0x411753 JE 411880 |
0x411759 LEA -0x1(%R14),%EDX |
0x41175d VPADDQ %ZMM4,%ZMM13,%ZMM15 |
0x411763 ADD %R11,%R13 |
0x411766 XOR %ECX,%ECX |
0x411768 NOPL (%RAX,%RAX,1) |
(94) 0x411770 VPMOVSXDQ (%R13,%RCX,4),%ZMM16 |
(94) 0x411778 VXORPD %XMM17,%XMM17,%XMM17 |
(94) 0x41177e VPSLLQ $0x4,%ZMM16,%ZMM16 |
(94) 0x411785 KXNORW %K0,%K0,%K1 |
(94) 0x411789 VGATHERQPD 0x8(%R10,%ZMM16,1),%ZMM17{%K1} |
(94) 0x411791 VPBROADCASTQ %RCX,%ZMM16 |
(94) 0x411797 VPADDQ %ZMM16,%ZMM15,%ZMM16 |
(94) 0x41179d VPSLLQ $0x3,%ZMM16,%ZMM18 |
(94) 0x4117a4 VPSLLQ $0x4,%ZMM16,%ZMM16 |
(94) 0x4117ab VXORPD %XMM19,%XMM19,%XMM19 |
(94) 0x4117b1 KXNORW %K0,%K0,%K1 |
(94) 0x4117b5 VPADDQ %ZMM18,%ZMM16,%ZMM16 |
(94) 0x4117bb VPXORD %XMM18,%XMM18,%XMM18 |
(94) 0x4117c1 VGATHERQPD (%R9,%ZMM16,1),%ZMM19{%K1} |
(94) 0x4117c8 KXNORW %K0,%K0,%K1 |
(94) 0x4117cc VGATHERQPD (%R8,%ZMM16,1),%ZMM18{%K1} |
(94) 0x4117d3 VDIVPD %ZMM17,%ZMM1,%ZMM17 |
(94) 0x4117d9 VFMADD231PD %ZMM19,%ZMM17,%ZMM18 |
(94) 0x4117df KXNORW %K0,%K0,%K1 |
(94) 0x4117e3 VXORPD %XMM19,%XMM19,%XMM19 |
(94) 0x4117e9 KXNORW %K0,%K0,%K2 |
(94) 0x4117ed VSCATTERQPD %ZMM18,(%R8,%ZMM16,1){%K1} |
(94) 0x4117f4 VXORPD %XMM18,%XMM18,%XMM18 |
(94) 0x4117fa VGATHERQPD 0x8(%R9,%ZMM16,1),%ZMM19{%K2} |
(94) 0x411802 KXNORW %K0,%K0,%K1 |
(94) 0x411806 VGATHERQPD 0x8(%R8,%ZMM16,1),%ZMM18{%K1} |
(94) 0x41180e VFMADD231PD %ZMM19,%ZMM17,%ZMM18 |
(94) 0x411814 KXNORW %K0,%K0,%K1 |
(94) 0x411818 VXORPD %XMM19,%XMM19,%XMM19 |
(94) 0x41181e KXNORW %K0,%K0,%K2 |
(94) 0x411822 VSCATTERQPD %ZMM18,0x8(%R8,%ZMM16,1){%K1} |
(94) 0x41182a VXORPD %XMM18,%XMM18,%XMM18 |
(94) 0x411830 VGATHERQPD 0x10(%R9,%ZMM16,1),%ZMM19{%K2} |
(94) 0x411838 KXNORW %K0,%K0,%K1 |
(94) 0x41183c VGATHERQPD 0x10(%R8,%ZMM16,1),%ZMM18{%K1} |
(94) 0x411844 VFMADD231PD %ZMM19,%ZMM17,%ZMM18 |
(94) 0x41184a KXNORW %K0,%K0,%K1 |
(94) 0x41184e VSCATTERQPD %ZMM18,0x10(%R8,%ZMM16,1){%K1} |
(94) 0x411856 ADD $0x8,%RCX |
(94) 0x41185a CMP %EDX,%ECX |
(94) 0x41185c JLE 411770 |
0x411862 CMP %R14D,%R15D |
0x411865 MOV -0x38(%RBP),%RDX |
0x411869 JE 411640 |
0x41186f JMP 411887 |
0x411880 XOR %R14D,%R14D |
0x411883 MOV -0x38(%RBP),%RDX |
0x411887 VPBROADCASTD %R14D,%YMM15 |
0x41188d VPSUBD %YMM15,%YMM14,%YMM14 |
0x411892 VPCMPNLEUD %YMM2,%YMM14,%K0 |
0x411899 KORTESTB %K0,%K0 |
0x41189d JE 411640 |
0x4118a3 VPCMPNLEUD %YMM3,%YMM14,%K1 |
0x4118aa MOVSXD %R14D,%RAX |
0x4118ad ADD %RAX,%R12 |
0x4118b0 VMOVDQU32 (%R11,%R12,4),%YMM14{%K1}{z} |
0x4118b7 VMOVDQA32 %YMM14,%YMM12{%K1} |
0x4118bd VPMOVSXDQ %YMM12,%ZMM14 |
0x4118c3 VPSLLQ $0x4,%ZMM14,%ZMM14 |
0x4118ca VXORPD %XMM16,%XMM16,%XMM16 |
0x4118d0 KMOVQ %K1,%K2 |
0x4118d5 VGATHERQPD 0x8(%R10,%ZMM14,1),%ZMM16{%K2} |
0x4118dd VPADDD %YMM3,%YMM15,%YMM14 |
0x4118e1 VPMOVSXDQ %YMM14,%ZMM14 |
0x4118e7 VPADDQ %ZMM14,%ZMM13,%ZMM13 |
0x4118ed VMOVAPD %ZMM16,%ZMM11{%K1} |
0x4118f3 VDIVPD %ZMM11,%ZMM1,%ZMM14 |
0x4118f9 VPSLLQ $0x3,%ZMM13,%ZMM15 |
0x411900 VPSLLQ $0x4,%ZMM13,%ZMM13 |
0x411907 VPADDQ %ZMM15,%ZMM13,%ZMM13 |
0x41190d VPXOR %XMM15,%XMM15,%XMM15 |
0x411912 KMOVQ %K1,%K2 |
0x411917 VGATHERQPD (%R9,%ZMM13,1),%ZMM15{%K2} |
0x41191e VXORPD %XMM16,%XMM16,%XMM16 |
0x411924 KMOVQ %K1,%K2 |
0x411929 VGATHERQPD (%R8,%ZMM13,1),%ZMM16{%K2} |
0x411930 VMOVAPD %ZMM15,%ZMM10{%K1} |
0x411936 VMOVAPD %ZMM16,%ZMM9{%K1} |
0x41193c VMOVAPD %ZMM10,%ZMM15 |
0x411942 VFMADD213PD %ZMM9,%ZMM14,%ZMM15 |
0x411948 KMOVQ %K1,%K2 |
0x41194d VSCATTERQPD %ZMM15,(%R8,%ZMM13,1){%K2} |
0x411954 VXORPD %XMM15,%XMM15,%XMM15 |
0x411959 KMOVQ %K1,%K2 |
0x41195e VGATHERQPD 0x8(%R9,%ZMM13,1),%ZMM15{%K2} |
0x411966 VXORPD %XMM16,%XMM16,%XMM16 |
0x41196c KMOVQ %K1,%K2 |
0x411971 VGATHERQPD 0x8(%R8,%ZMM13,1),%ZMM16{%K2} |
0x411979 VMOVAPD %ZMM15,%ZMM8{%K1} |
0x41197f VMOVAPD %ZMM16,%ZMM7{%K1} |
0x411985 VMOVAPD %ZMM8,%ZMM15 |
0x41198b VFMADD213PD %ZMM7,%ZMM14,%ZMM15 |
0x411991 KMOVQ %K1,%K2 |
0x411996 VSCATTERQPD %ZMM15,0x8(%R8,%ZMM13,1){%K2} |
0x41199e VXORPD %XMM15,%XMM15,%XMM15 |
0x4119a3 KMOVQ %K1,%K2 |
0x4119a8 VGATHERQPD 0x10(%R9,%ZMM13,1),%ZMM15{%K2} |
0x4119b0 VXORPD %XMM16,%XMM16,%XMM16 |
0x4119b6 KMOVQ %K1,%K2 |
0x4119bb VGATHERQPD 0x10(%R8,%ZMM13,1),%ZMM16{%K2} |
0x4119c3 VMOVAPD %ZMM15,%ZMM6{%K1} |
0x4119c9 VMOVAPD %ZMM16,%ZMM5{%K1} |
0x4119cf VFMADD213PD %ZMM5,%ZMM6,%ZMM14 |
0x4119d5 VSCATTERQPD %ZMM14,0x10(%R8,%ZMM13,1){%K1} |
0x4119dd JMP 411640 |
/home/eoseret/qaas_runs_CPU_9468/171-148-3214/intel/CoMD/build/CoMD/CoMD/src-openmp/timestep.c: 85 - 94 |
-------------------------------------------------------------------------------- |
85: #pragma omp parallel for |
86: for (int iBox=0; iBox<nBoxes; iBox++) |
87: { |
88: for (int iOff=MAXATOMS*iBox,ii=0; ii<s->boxes->nAtoms[iBox]; ii++,iOff++) |
89: { |
90: int iSpecies = s->atoms->iSpecies[iOff]; |
91: real_t invMass = 1.0/s->species[iSpecies].mass; |
92: s->atoms->r[iOff][0] += dt*s->atoms->p[iOff][0]*invMass; |
93: s->atoms->r[iOff][1] += dt*s->atoms->p[iOff][1]*invMass; |
94: s->atoms->r[iOff][2] += dt*s->atoms->p[iOff][2]*invMass; |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.36 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.11 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.19 |
| Bottlenecks | micro-operation queue, |
| Function | advancePosition.extracted |
| Source | timestep.c:85-94 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 33.33 |
| CQA cycles if no scalar integer | 24.50 |
| CQA cycles if FP arith vectorized | 33.33 |
| CQA cycles if fully vectorized | 29.98 |
| Front-end cycles | 33.33 |
| DIV/SQRT cycles | 28.00 |
| P0 cycles | 11.00 |
| P1 cycles | 23.00 |
| P2 cycles | 23.00 |
| P3 cycles | 12.00 |
| P4 cycles | 26.00 |
| P5 cycles | 11.00 |
| P6 cycles | 12.00 |
| P7 cycles | 12.00 |
| P8 cycles | 12.00 |
| P9 cycles | 11.00 |
| P10 cycles | 23.00 |
| P11 cycles | 16.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | 28.76 - 349.89 |
| Stall cycles (UFS) | 0.91 - 322.04 |
| Nb insns | 115.00 |
| Nb uops | 200.00 |
| Nb loads | 20.00 |
| Nb stores | 3.00 |
| Nb stack references | 4.00 |
| FLOP/cycle | 1.68 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 24.00 |
| Nb FLOP div | 8.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 22.92 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 572.00 |
| Bytes stored | 192.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 81.48 |
| Vectorization ratio load | 88.89 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 71.43 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 79.49 |
| Vector-efficiency ratio all | 66.09 |
| Vector-efficiency ratio load | 84.72 |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 59.82 |
| Vector-efficiency ratio fma | 100.00 |
| Vector-efficiency ratio div_sqrt | 100.00 |
| Vector-efficiency ratio other | 61.54 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.36 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.11 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.19 |
| Bottlenecks | micro-operation queue, |
| Function | advancePosition.extracted |
| Source | timestep.c:85-94 |
| Source loop unroll info | NA |
| Source loop unroll confidence level | NA |
| Unroll/vectorization loop type | NA |
| Unroll factor | NA |
| CQA cycles | 33.33 |
| CQA cycles if no scalar integer | 24.50 |
| CQA cycles if FP arith vectorized | 33.33 |
| CQA cycles if fully vectorized | 29.98 |
| Front-end cycles | 33.33 |
| DIV/SQRT cycles | 28.00 |
| P0 cycles | 11.00 |
| P1 cycles | 23.00 |
| P2 cycles | 23.00 |
| P3 cycles | 12.00 |
| P4 cycles | 26.00 |
| P5 cycles | 11.00 |
| P6 cycles | 12.00 |
| P7 cycles | 12.00 |
| P8 cycles | 12.00 |
| P9 cycles | 11.00 |
| P10 cycles | 23.00 |
| P11 cycles | 16.00 |
| Inter-iter dependencies cycles | NA |
| FE+BE cycles (UFS) | 28.76 - 349.89 |
| Stall cycles (UFS) | 0.91 - 322.04 |
| Nb insns | 115.00 |
| Nb uops | 200.00 |
| Nb loads | 20.00 |
| Nb stores | 3.00 |
| Nb stack references | 4.00 |
| FLOP/cycle | 1.68 |
| Nb FLOP add-sub | 0.00 |
| Nb FLOP mul | 0.00 |
| Nb FLOP fma | 24.00 |
| Nb FLOP div | 8.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 0.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 22.92 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 572.00 |
| Bytes stored | 192.00 |
| Stride 0 | NA |
| Stride 1 | NA |
| Stride n | NA |
| Stride unknown | NA |
| Stride indirect | NA |
| Vectorization ratio all | 81.48 |
| Vectorization ratio load | 88.89 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | NA |
| Vectorization ratio add_sub | 71.43 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 79.49 |
| Vector-efficiency ratio all | 66.09 |
| Vector-efficiency ratio load | 84.72 |
| Vector-efficiency ratio store | 100.00 |
| Vector-efficiency ratio mul | NA |
| Vector-efficiency ratio add_sub | 59.82 |
| Vector-efficiency ratio fma | 100.00 |
| Vector-efficiency ratio div_sqrt | 100.00 |
| Vector-efficiency ratio other | 61.54 |
| Path / |
| Function | advancePosition.extracted |
| Source file and lines | timestep.c:85-94 |
| Module | exec |
| nb instructions | 115 |
| nb uops | 200 |
| loop length | 586 |
| used x86 registers | 15 |
| used mmx registers | 0 |
| used xmm registers | 2 |
| used ymm registers | 5 |
| used zmm registers | 13 |
| nb stack references | 4 |
| micro-operation queue | 33.33 cycles |
| front end | 33.33 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 28.00 | 11.00 | 23.00 | 23.00 | 12.00 | 26.00 | 11.00 | 12.00 | 12.00 | 12.00 | 11.00 | 23.00 |
| cycles | 28.00 | 11.00 | 23.00 | 23.00 | 12.00 | 26.00 | 11.00 | 12.00 | 12.00 | 12.00 | 11.00 | 23.00 |
| Cycles executing div or sqrt instructions | 16.00 |
| FE+BE cycles | 28.76-349.89 |
| Stall cycles | 0.91-322.04 |
| ROB full (events) | 0.32-330.93 |
| RS full (events) | 1.95-0.19 |
| Front-end | 33.33 |
| Dispatch | 28.00 |
| DIV/SQRT | 16.00 |
| Overall L1 | 33.33 |
| all | 60% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 71% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 52% |
| all | 100% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 81% |
| load | 88% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 71% |
| fma | 100% |
| div/sqrt | 100% |
| other | 79% |
| all | 44% |
| load | 31% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 59% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 38% |
| all | 84% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | 100% |
| div/sqrt | 100% |
| other | 79% |
| all | 66% |
| load | 84% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 59% |
| fma | 100% |
| div/sqrt | 100% |
| other | 61% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ADD $0x40,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| CMP -0x40(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
| LEA 0x1(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| JE 411590 <advancePosition.extracted+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA (%RDI,%RDX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV (%RAX,%RCX,4),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| JLE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV %ESI,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| LEA (,%RCX,4),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%RDI,%RDX,1),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| SAL $0x6,%R12D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
| MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x20(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x28(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x18(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x20(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| LEA -0x1(%R15),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
| ADD %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
| LEA (%RDX,%RDX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA 0x10(%R9,%RDX,1),%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| LEA (,%R12,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%R14,%R14,2),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%R8,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CMP %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JB 411740 <advancePosition.extracted+0x220> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| ADD %R9,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| LEA 0x10(%R8,%RDX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JB 411740 <advancePosition.extracted+0x220> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA 0x10(,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| ADD %R13,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| JMP 411640 <advancePosition.extracted+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| VPBROADCASTQ %R12,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPBROADCASTD %R15D,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| JE 411880 <advancePosition.extracted+0x360> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA -0x1(%R14),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| VPADDQ %ZMM4,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| ADD %R11,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| JE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| JMP 411887 <advancePosition.extracted+0x367> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
| XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| VPBROADCASTD %R14D,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSUBD %YMM15,%YMM14,%YMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
| VPCMPNLEUD %YMM2,%YMM14,%K0 | |||||||||||||||
| KORTESTB %K0,%K0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| JE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| VPCMPNLEUD %YMM3,%YMM14,%K1 | |||||||||||||||
| MOVSXD %R14D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
| ADD %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVDQU32 (%R11,%R12,4),%YMM14{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVDQA32 %YMM14,%YMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPMOVSXDQ %YMM12,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSLLQ $0x4,%ZMM14,%ZMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R10,%ZMM14,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VPADDD %YMM3,%YMM15,%YMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VPMOVSXDQ %YMM14,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPADDQ %ZMM14,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| VMOVAPD %ZMM16,%ZMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VDIVPD %ZMM11,%ZMM1,%ZMM14 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
| VPSLLQ $0x3,%ZMM13,%ZMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VPSLLQ $0x4,%ZMM13,%ZMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VPADDQ %ZMM15,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| VPXOR %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD (%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD (%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM9{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM10,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM9,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VSCATTERQPD %ZMM15,(%R8,%ZMM13,1){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM8{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM7{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM8,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM7,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VSCATTERQPD %ZMM15,0x8(%R8,%ZMM13,1){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x10(%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x10(%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM6{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM5{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM5,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VSCATTERQPD %ZMM14,0x10(%R8,%ZMM13,1){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| JMP 411640 <advancePosition.extracted+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| Function | advancePosition.extracted |
| Source file and lines | timestep.c:85-94 |
| Module | exec |
| nb instructions | 115 |
| nb uops | 200 |
| loop length | 586 |
| used x86 registers | 15 |
| used mmx registers | 0 |
| used xmm registers | 2 |
| used ymm registers | 5 |
| used zmm registers | 13 |
| nb stack references | 4 |
| micro-operation queue | 33.33 cycles |
| front end | 33.33 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 28.00 | 11.00 | 23.00 | 23.00 | 12.00 | 26.00 | 11.00 | 12.00 | 12.00 | 12.00 | 11.00 | 23.00 |
| cycles | 28.00 | 11.00 | 23.00 | 23.00 | 12.00 | 26.00 | 11.00 | 12.00 | 12.00 | 12.00 | 11.00 | 23.00 |
| Cycles executing div or sqrt instructions | 16.00 |
| FE+BE cycles | 28.76-349.89 |
| Stall cycles | 0.91-322.04 |
| ROB full (events) | 0.32-330.93 |
| RS full (events) | 1.95-0.19 |
| Front-end | 33.33 |
| Dispatch | 28.00 |
| DIV/SQRT | 16.00 |
| Overall L1 | 33.33 |
| all | 60% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 71% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 52% |
| all | 100% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | 100% |
| div/sqrt | 100% |
| other | 100% |
| all | 81% |
| load | 88% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 71% |
| fma | 100% |
| div/sqrt | 100% |
| other | 79% |
| all | 44% |
| load | 31% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 59% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 38% |
| all | 84% |
| load | 100% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | 100% |
| div/sqrt | 100% |
| other | 79% |
| all | 66% |
| load | 84% |
| store | 100% |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 59% |
| fma | 100% |
| div/sqrt | 100% |
| other | 61% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ADD $0x40,%ESI | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| CMP -0x40(%RBP),%RDI | 1 | 0.20 | 0.20 | 0.33 | 0.33 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.33 | 1 | 0.33 |
| LEA 0x1(%RDI),%RDI | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| JE 411590 <advancePosition.extracted+0x70> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA (%RDI,%RDX,1),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x60(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV (%RAX,%RCX,4),%R15D | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| TEST %R15D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| JLE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV %ESI,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| LEA (,%RCX,4),%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%RDI,%RDX,1),%R12D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| SAL $0x6,%R12D | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
| MOV -0x58(%RBP),%RAX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x20(%RAX),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x28(%RAX),%R10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x10(%RDX),%R11 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x18(%RDX),%R8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV 0x20(%RDX),%R9 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| LEA -0x1(%R15),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| MOVSXD %EDX,%RDX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
| ADD %R12,%RDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| SAL $0x3,%RDX | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0-2 | 0.50 |
| LEA (%RDX,%RDX,2),%RDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA 0x10(%R9,%RDX,1),%RBX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| LEA (,%R12,8),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%R14,%R14,2),%R14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA (%R8,%R14,1),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CMP %RAX,%RBX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JB 411740 <advancePosition.extracted+0x220> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| ADD %R9,%R14 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| LEA 0x10(%R8,%RDX,1),%RAX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| CMP %R14,%RAX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| JB 411740 <advancePosition.extracted+0x220> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA (%RCX,%RCX,2),%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| LEA 0x10(,%RAX,8),%RCX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| ADD %R13,%R11 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %EDX,%EDX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPW (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| JMP 411640 <advancePosition.extracted+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| MOV %R15D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| VPBROADCASTQ %R12,%ZMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPBROADCASTD %R15D,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| AND $-0x8,%R14D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| JE 411880 <advancePosition.extracted+0x360> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| LEA -0x1(%R14),%EDX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1-2 | 0.20 |
| VPADDQ %ZMM4,%ZMM13,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| ADD %R11,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| XOR %ECX,%ECX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| NOPL (%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| CMP %R14D,%R15D | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| JE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| JMP 411887 <advancePosition.extracted+0x367> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.84 |
| XOR %R14D,%R14D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| MOV -0x38(%RBP),%RDX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| VPBROADCASTD %R14D,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSUBD %YMM15,%YMM14,%YMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.33 |
| VPCMPNLEUD %YMM2,%YMM14,%K0 | |||||||||||||||
| KORTESTB %K0,%K0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| JE 411640 <advancePosition.extracted+0x120> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| VPCMPNLEUD %YMM3,%YMM14,%K1 | |||||||||||||||
| MOVSXD %R14D,%RAX | 1 | 0 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0.33 | 0 | 1 | 0.33 |
| ADD %RAX,%R12 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVDQU32 (%R11,%R12,4),%YMM14{%K1}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVDQA32 %YMM14,%YMM12{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPMOVSXDQ %YMM12,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPSLLQ $0x4,%ZMM14,%ZMM14 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R10,%ZMM14,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VPADDD %YMM3,%YMM15,%YMM14 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VPMOVSXDQ %YMM14,%ZMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPADDQ %ZMM14,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| VMOVAPD %ZMM16,%ZMM11{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VDIVPD %ZMM11,%ZMM1,%ZMM14 | 3 | 2.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 22-24 | 16 |
| VPSLLQ $0x3,%ZMM13,%ZMM15 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VPSLLQ $0x4,%ZMM13,%ZMM13 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2-4 | 1 |
| VPADDQ %ZMM15,%ZMM13,%ZMM13 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.50 |
| VPXOR %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD (%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD (%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM10{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM9{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM10,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM9,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VSCATTERQPD %ZMM15,(%R8,%ZMM13,1){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x8(%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM8{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM7{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM8,%ZMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM7,%ZMM14,%ZMM15 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VSCATTERQPD %ZMM15,0x8(%R8,%ZMM13,1){%K2} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| VXORPD %XMM15,%XMM15,%XMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x10(%R9,%ZMM13,1),%ZMM15{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VXORPD %XMM16,%XMM16,%XMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| KMOVQ %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| VGATHERQPD 0x10(%R8,%ZMM13,1),%ZMM16{%K2} | 5 | 1 | 0 | 2.67 | 2.67 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2.67 | 0-29 | 2.67 |
| VMOVAPD %ZMM15,%ZMM6{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %ZMM16,%ZMM5{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VFMADD213PD %ZMM5,%ZMM6,%ZMM14 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VSCATTERQPD %ZMM14,0x10(%R8,%ZMM13,1){%K1} | 20 | 2.20 | 0.20 | 0 | 0 | 4 | 0.20 | 0.20 | 4 | 4 | 4 | 0.20 | 0 | 2-12 | 7 |
| JMP 411640 <advancePosition.extracted+0x120> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
