| Loop Id: 1028 | Module: exec | Source: viscosity_kernel.f90:53-89 | Coverage: 2.2% |
|---|
| Loop Id: 1028 | Module: exec | Source: viscosity_kernel.f90:53-89 | Coverage: 2.2% |
|---|
0x46b760 VCMPPD $0x1,%YMM13,%YMM22,%K3 |
0x46b767 VBROADCASTSD 0xa6878(%RIP),%YMM8 [9] |
0x46b770 VBLENDMPD %YMM8,%YMM18,%YMM31{%K3} |
0x46b776 VBROADCASTSD 0x89a80(%RIP),%YMM29 [9] |
0x46b780 VANDPD %YMM29,%YMM22,%YMM1 |
0x46b786 VMAXPD %YMM1,%YMM14,%YMM1 |
0x46b78a VMULPD %YMM31,%YMM1,%YMM1 |
0x46b790 VCMPPD $0x1,%YMM13,%YMM1,%K3 |
0x46b797 VBLENDMPD %YMM8,%YMM18,%YMM25{%K3} |
0x46b79d VANDPD %YMM29,%YMM21,%YMM13 |
0x46b7a3 VMAXPD %YMM13,%YMM14,%YMM13 |
0x46b7a8 VMULPD %YMM25,%YMM13,%YMM13 |
0x46b7ae VMULPD %YMM1,%YMM1,%YMM20 |
0x46b7b4 VFMADD231PD %YMM13,%YMM13,%YMM20 |
0x46b7ba VXORPD %XMM28,%XMM28,%XMM28 |
0x46b7c0 VSQRTPD %YMM20,%YMM23 |
0x46b7c6 VMULPD %YMM2,%YMM23,%YMM2 |
0x46b7cc VDIVPD %YMM1,%YMM2,%YMM1 |
0x46b7d0 VANDPD %YMM29,%YMM1,%YMM22 |
0x46b7d6 VMULPD %YMM24,%YMM23,%YMM1 |
0x46b7dc VDIVPD %YMM13,%YMM1,%YMM1 |
0x46b7e1 VANDPD %YMM29,%YMM1,%YMM21 |
0x46b7e7 VCMPPD $0x2,%YMM21,%YMM22,%K3 |
0x46b7ee VBLENDMPD %YMM22,%YMM21,%YMM30{%K3} |
0x46b7f4 VMULPD %YMM30,%YMM30,%YMM29 |
0x46b7fa VMULPD %YMM10,%YMM10,%YMM1 |
0x46b7ff IMUL %R8,%RCX |
0x46b803 ADD %R9,%RCX |
0x46b806 VMOVUPD (%RCX,%R13,8),%YMM2{%K2}{z} [5] |
0x46b80d VADDPD %YMM29,%YMM29,%YMM10 |
0x46b813 VMULPD %YMM2,%YMM1,%YMM1 |
0x46b817 VMULPD %YMM1,%YMM10,%YMM1 |
0x46b81b VPBROADCASTQ %R13,%YMM2 |
0x46b821 VPADDQ 0x897f7(%RIP),%YMM2,%YMM2 [9] |
0x46b829 VPBLENDMQ %YMM5,%YMM2,%YMM5{%K1} |
0x46b82f VMOVAPD %YMM4,%YMM31{%K1} |
0x46b835 VPBLENDMQ %YMM9,%YMM2,%YMM9{%K1} |
0x46b83b VMOVAPD %YMM17,%YMM25{%K1} |
0x46b841 VPBLENDMQ %YMM6,%YMM2,%YMM6{%K1} |
0x46b847 VMOVAPD %YMM16,%YMM23{%K1} |
0x46b84d VPBLENDMQ %YMM3,%YMM2,%YMM3{%K1} |
0x46b853 VMOVAPD %YMM15,%YMM22{%K1} |
0x46b859 VPBLENDMQ %YMM11,%YMM2,%YMM11{%K1} |
0x46b85f VMOVAPD %YMM19,%YMM21{%K1} |
0x46b865 VPBLENDMQ %YMM12,%YMM2,%YMM12{%K1} |
0x46b86b VMOVAPD %YMM0,%YMM30{%K1} |
0x46b871 VMOVDQA64 %YMM7,%YMM2{%K1} |
0x46b877 VMOVAPD %YMM27,%YMM29{%K1} |
0x46b87d VMOVAPD %YMM28,%YMM1{%K1} |
0x46b883 VMOVUPD %YMM1,(%R12,%R13,8) [1] |
0x46b889 ADD $0x4,%R13 |
0x46b88d VMOVDQA %YMM2,%YMM7 |
0x46b891 VMOVAPD %YMM29,%YMM27 |
0x46b897 VMOVAPD %YMM30,%YMM0 |
0x46b89d VMOVAPD %YMM21,%YMM19 |
0x46b8a3 VMOVAPD %YMM22,%YMM15 |
0x46b8a9 VMOVAPD %YMM23,%YMM16 |
0x46b8af VMOVAPD %YMM25,%YMM17 |
0x46b8b5 VMOVAPD %YMM31,%YMM4 |
0x46b8bb CMP %RDI,%R13 |
0x46b8be VMOVAPD %YMM14,%YMM8 |
0x46b8c3 JAE 46ba60 |
0x46b8c9 VMOVUPD -0x8(%R10,%R13,8),%YMM10 [7] |
0x46b8d0 VMOVUPD (%R10,%R13,8),%YMM21 [7] |
0x46b8d7 VMOVUPD -0x8(%R14,%R13,8),%YMM22 [4] |
0x46b8e2 VMOVUPD (%R14,%R13,8),%YMM23 [4] |
0x46b8e9 VADDPD %YMM21,%YMM23,%YMM2 |
0x46b8ef VADDPD %YMM22,%YMM10,%YMM25 |
0x46b8f5 VMOVUPD -0x8(%RDX,%R13,8),%YMM29 [6] |
0x46b900 VMOVUPD (%RDX,%R13,8),%YMM30 [6] |
0x46b907 VSUBPD %YMM25,%YMM2,%YMM25 |
0x46b90d VMOVUPD -0x8(%RSI,%R13,8),%YMM31 [8] |
0x46b918 VMOVUPD (%RSI,%R13,8),%YMM20 [8] |
0x46b91f VADDPD %YMM29,%YMM30,%YMM2 |
0x46b925 VADDPD %YMM20,%YMM31,%YMM1 |
0x46b92b VSUBPD %YMM1,%YMM2,%YMM1 |
0x46b92f MOV 0x158(%RSP),%RCX [11] |
0x46b937 VMOVUPD -0x8(%RCX,%R13,8),%YMM2 [12] |
0x46b93e VMULPD %YMM25,%YMM2,%YMM13 |
0x46b944 VFMADD231PD %YMM1,%YMM24,%YMM13 |
0x46b94a VADDPD %YMM10,%YMM21,%YMM10 |
0x46b950 VSUBPD %YMM10,%YMM23,%YMM10 |
0x46b956 VADDPD %YMM22,%YMM10,%YMM10 |
0x46b95c VBROADCASTSD 0x8a69b(%RIP),%YMM14 [9] |
0x46b965 VMULPD %YMM14,%YMM10,%YMM10 |
0x46b96a VADDPD %YMM31,%YMM29,%YMM21 |
0x46b970 VSUBPD %YMM21,%YMM30,%YMM21 |
0x46b976 VADDPD %YMM20,%YMM21,%YMM20 |
0x46b97c VMULPD %YMM14,%YMM20,%YMM20 |
0x46b982 VDIVPD %YMM2,%YMM18,%YMM23 |
0x46b988 VMULPD %YMM23,%YMM20,%YMM20 |
0x46b98e VMOVUPD (%RAX,%R13,8),%YMM21 [3] |
0x46b995 VFMADD231PD %YMM10,%YMM26,%YMM20 |
0x46b99b VSUBPD -0x10(%RAX,%R13,8),%YMM21,%YMM10 [3] |
0x46b9a6 VADDPD (%RCX,%R13,8),%YMM2,%YMM21 [12] |
0x46b9ad VDIVPD %YMM21,%YMM10,%YMM22 |
0x46b9b3 VMOVUPD (%R15,%R13,8),%YMM10 [2] |
0x46b9b9 VSUBPD (%R11,%R13,8),%YMM10,%YMM10 [14] |
0x46b9bf VMULPD 0x180(%RSP),%YMM10,%YMM21 [11] |
0x46b9c7 VMULPD %YMM22,%YMM22,%YMM10 |
0x46b9cd VMULPD %YMM21,%YMM21,%YMM29 |
0x46b9d3 VMULPD %YMM14,%YMM25,%YMM25 |
0x46b9d9 VMULPD %YMM23,%YMM25,%YMM23 |
0x46b9df VMULPD %YMM10,%YMM23,%YMM23 |
0x46b9e5 VMULPD %YMM1,%YMM14,%YMM1 |
0x46b9e9 VMULPD %YMM29,%YMM1,%YMM1 |
0x46b9ef VMULPD %YMM20,%YMM22,%YMM20 |
0x46b9f5 VFMADD213PD %YMM23,%YMM21,%YMM20 |
0x46b9fb VFMADD231PD %YMM1,%YMM26,%YMM20 |
0x46ba01 VADDPD %YMM10,%YMM29,%YMM1 |
0x46ba07 VMAXPD %YMM8,%YMM1,%YMM1 |
0x46ba0c VDIVPD %YMM1,%YMM20,%YMM10 |
0x46ba12 VCMPPD $0x1,%YMM10,%YMM28,%K1 |
0x46ba19 VCMPPD $0x2,%YMM13,%YMM28,%K0 |
0x46ba20 VCMPPD $0x6,%YMM13,%YMM28,%K1{%K1} |
0x46ba27 KORW %K1,%K0,%K1 |
0x46ba2b KNOTW %K1,%K2 |
0x46ba2f KMOVD %K2,%EBX |
0x46ba33 TEST $0xf,%BL |
0x46ba36 VMOVAPD %YMM8,%YMM14 |
0x46ba3b VXORPD %XMM13,%XMM13,%XMM13 |
0x46ba40 JE 46b760 |
0x46ba46 MOV 0x78(%RBP),%RCX [13] |
0x46ba4a MOV (%RCX),%RCX [10] |
0x46ba4d JMP 46b760 |
/home/eoseret/qaas_runs_CPU_9468/171-152-3172/intel/CloverLeafFC/build/CloverLeafFC/CloverLeaf_ref/kernels/viscosity_kernel.f90: 53 - 89 |
-------------------------------------------------------------------------------- |
53: DO k=y_min,y_max |
54: !$OMP SIMD |
55: DO j=x_min,x_max |
56: ugrad=(xvel0(j+1,k )+xvel0(j+1,k+1))-(xvel0(j ,k )+xvel0(j ,k+1)) |
57: |
58: vgrad=(yvel0(j ,k+1)+yvel0(j+1,k+1))-(yvel0(j ,k )+yvel0(j+1,k )) |
59: |
60: div = (celldx(j)*(ugrad)+ celldy(k)*(vgrad)) |
61: |
62: strain2 = 0.5_8*(xvel0(j, k+1) + xvel0(j+1,k+1)-xvel0(j ,k )-xvel0(j+1,k ))/celldy(k) & |
63: + 0.5_8*(yvel0(j+1,k ) + yvel0(j+1,k+1)-yvel0(j ,k )-yvel0(j ,k+1))/celldx(j) |
64: |
65: pgradx=(pressure(j+1,k)-pressure(j-1,k))/(celldx(j)+celldx(j+1)) |
66: pgrady=(pressure(j,k+1)-pressure(j,k-1))/(celldy(k)+celldy(k+1)) |
67: |
68: pgradx2 = pgradx*pgradx |
69: pgrady2 = pgrady*pgrady |
70: |
71: limiter = ((0.5_8*(ugrad)/celldx(j))*pgradx2+(0.5_8*(vgrad)/celldy(k))*pgrady2+strain2*pgradx*pgrady) & |
72: /MAX(pgradx2+pgrady2,1.0e-16_8) |
73: |
74: IF ((limiter.GT.0.0).OR.(div.GE.0.0))THEN |
75: viscosity(j,k) = 0.0 |
76: ELSE |
77: dirx=1.0_8 |
78: IF(pgradx.LT.0.0) dirx=-1.0_8 |
79: pgradx = dirx*MAX(1.0e-16_8,ABS(pgradx)) |
80: diry=1.0_8 |
81: IF(pgradx.LT.0.0) diry=-1.0_8 |
82: pgrady = diry*MAX(1.0e-16_8,ABS(pgrady)) |
83: pgrad = SQRT(pgradx**2+pgrady**2) |
84: xgrad = ABS(celldx(j)*pgrad/pgradx) |
85: ygrad = ABS(celldy(k)*pgrad/pgrady) |
86: grad = MIN(xgrad,ygrad) |
87: grad2 = grad*grad |
88: |
89: viscosity(j,k)=2.0_8*density0(j,k)*grad2*limiter*limiter |
| Path / |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.88 |
| Bottlenecks | |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source | viscosity_kernel.f90:53-89 |
| Source loop unroll info | unrolled by 4 |
| Source loop unroll confidence level | high |
| Unroll/vectorization loop type | main |
| Unroll factor | 4 |
| CQA cycles | 49.00 |
| CQA cycles if no scalar integer | 49.00 |
| CQA cycles if FP arith vectorized | 49.00 |
| CQA cycles if fully vectorized | 49.00 |
| Front-end cycles | 21.08 |
| DIV/SQRT cycles | 26.00 |
| P0 cycles | 26.00 |
| P1 cycles | 7.33 |
| P2 cycles | 7.33 |
| P3 cycles | 0.50 |
| P4 cycles | 26.00 |
| P5 cycles | 2.40 |
| P6 cycles | 0.50 |
| P7 cycles | 0.50 |
| P8 cycles | 0.50 |
| P9 cycles | 2.60 |
| P10 cycles | 7.33 |
| P11 cycles | 49.00 |
| Inter-iter dependencies cycles | 1 - 2 |
| FE+BE cycles (UFS) | 50.35 - 49.85 |
| Stall cycles (UFS) | 28.47 - 27.97 |
| Nb insns | 123.50 |
| Nb uops | 123.50 |
| Nb loads | 22.00 |
| Nb stores | 1.00 |
| Nb stack references | 2.50 |
| FLOP/cycle | 4.49 |
| Nb FLOP add-sub | 68.00 |
| Nb FLOP mul | 88.00 |
| Nb FLOP fma | 20.00 |
| Nb FLOP div | 20.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 4.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 12.57 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 584.00 |
| Bytes stored | 32.00 |
| Stride 0 | 2.50 |
| Stride 1 | 3.00 |
| Stride n | 5.00 |
| Stride unknown | 1.50 |
| Stride indirect | 0.00 |
| Vectorization ratio all | 95.54 |
| Vectorization ratio load | 85.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 89.58 |
| Vector-efficiency ratio all | 47.88 |
| Vector-efficiency ratio load | 44.38 |
| Vector-efficiency ratio store | 50.00 |
| Vector-efficiency ratio mul | 50.00 |
| Vector-efficiency ratio add_sub | 50.00 |
| Vector-efficiency ratio fma | 50.00 |
| Vector-efficiency ratio div_sqrt | 50.00 |
| Vector-efficiency ratio other | 45.05 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.88 |
| Bottlenecks | P0, |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source | viscosity_kernel.f90:53-89 |
| Source loop unroll info | unrolled by 4 |
| Source loop unroll confidence level | high |
| Unroll/vectorization loop type | main |
| Unroll factor | 4 |
| CQA cycles | 49.00 |
| CQA cycles if no scalar integer | 49.00 |
| CQA cycles if FP arith vectorized | 49.00 |
| CQA cycles if fully vectorized | 49.00 |
| Front-end cycles | 21.33 |
| DIV/SQRT cycles | 26.00 |
| P0 cycles | 26.00 |
| P1 cycles | 7.67 |
| P2 cycles | 7.67 |
| P3 cycles | 0.50 |
| P4 cycles | 26.00 |
| P5 cycles | 2.40 |
| P6 cycles | 0.50 |
| P7 cycles | 0.50 |
| P8 cycles | 0.50 |
| P9 cycles | 2.60 |
| P10 cycles | 7.67 |
| P11 cycles | 49.00 |
| Inter-iter dependencies cycles | 1 - 2 |
| FE+BE cycles (UFS) | 50.33 - 49.86 |
| Stall cycles (UFS) | 28.18 - 27.71 |
| Nb insns | 125.00 |
| Nb uops | 125.00 |
| Nb loads | 23.00 |
| Nb stores | 1.00 |
| Nb stack references | 3.00 |
| FLOP/cycle | 4.49 |
| Nb FLOP add-sub | 68.00 |
| Nb FLOP mul | 88.00 |
| Nb FLOP fma | 20.00 |
| Nb FLOP div | 20.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 4.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 12.73 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 592.00 |
| Bytes stored | 32.00 |
| Stride 0 | 3.00 |
| Stride 1 | 3.00 |
| Stride n | 5.00 |
| Stride unknown | 2.00 |
| Stride indirect | 0.00 |
| Vectorization ratio all | 95.54 |
| Vectorization ratio load | 85.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 89.58 |
| Vector-efficiency ratio all | 47.88 |
| Vector-efficiency ratio load | 44.38 |
| Vector-efficiency ratio store | 50.00 |
| Vector-efficiency ratio mul | 50.00 |
| Vector-efficiency ratio add_sub | 50.00 |
| Vector-efficiency ratio fma | 50.00 |
| Vector-efficiency ratio div_sqrt | 50.00 |
| Vector-efficiency ratio other | 45.05 |
| Metric | Value |
|---|---|
| CQA speedup if no scalar integer | 1.00 |
| CQA speedup if FP arith vectorized | 1.00 |
| CQA speedup if fully vectorized | 1.00 |
| CQA speedup if no inter-iteration dependency | NA |
| CQA speedup if next bottleneck killed | 1.88 |
| Bottlenecks | P0, |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source | viscosity_kernel.f90:53-89 |
| Source loop unroll info | unrolled by 4 |
| Source loop unroll confidence level | high |
| Unroll/vectorization loop type | main |
| Unroll factor | 4 |
| CQA cycles | 49.00 |
| CQA cycles if no scalar integer | 49.00 |
| CQA cycles if FP arith vectorized | 49.00 |
| CQA cycles if fully vectorized | 49.00 |
| Front-end cycles | 20.83 |
| DIV/SQRT cycles | 26.00 |
| P0 cycles | 26.00 |
| P1 cycles | 7.00 |
| P2 cycles | 7.00 |
| P3 cycles | 0.50 |
| P4 cycles | 26.00 |
| P5 cycles | 2.40 |
| P6 cycles | 0.50 |
| P7 cycles | 0.50 |
| P8 cycles | 0.50 |
| P9 cycles | 2.60 |
| P10 cycles | 7.00 |
| P11 cycles | 49.00 |
| Inter-iter dependencies cycles | 1 - 2 |
| FE+BE cycles (UFS) | 50.37 - 49.85 |
| Stall cycles (UFS) | 28.75 - 28.23 |
| Nb insns | 122.00 |
| Nb uops | 122.00 |
| Nb loads | 21.00 |
| Nb stores | 1.00 |
| Nb stack references | 2.00 |
| FLOP/cycle | 4.49 |
| Nb FLOP add-sub | 68.00 |
| Nb FLOP mul | 88.00 |
| Nb FLOP fma | 20.00 |
| Nb FLOP div | 20.00 |
| Nb FLOP rcp | 0.00 |
| Nb FLOP sqrt | 4.00 |
| Nb FLOP rsqrt | 0.00 |
| Bytes/cycle | 12.41 |
| Bytes prefetched | 0.00 |
| Bytes loaded | 576.00 |
| Bytes stored | 32.00 |
| Stride 0 | 2.00 |
| Stride 1 | 3.00 |
| Stride n | 5.00 |
| Stride unknown | 1.00 |
| Stride indirect | 0.00 |
| Vectorization ratio all | 95.54 |
| Vectorization ratio load | 85.00 |
| Vectorization ratio store | 100.00 |
| Vectorization ratio mul | 100.00 |
| Vectorization ratio add_sub | 100.00 |
| Vectorization ratio fma | 100.00 |
| Vectorization ratio div_sqrt | 100.00 |
| Vectorization ratio other | 89.58 |
| Vector-efficiency ratio all | 47.88 |
| Vector-efficiency ratio load | 44.38 |
| Vector-efficiency ratio store | 50.00 |
| Vector-efficiency ratio mul | 50.00 |
| Vector-efficiency ratio add_sub | 50.00 |
| Vector-efficiency ratio fma | 50.00 |
| Vector-efficiency ratio div_sqrt | 50.00 |
| Vector-efficiency ratio other | 45.05 |
| Path / |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source file and lines | viscosity_kernel.f90:53-89 |
| Module | exec |
| nb instructions | 123.50 |
| nb uops | 123.50 |
| loop length | 748 |
| used x86 registers | 15.50 |
| used mmx registers | 0 |
| used xmm registers | 2 |
| used ymm registers | 32 |
| used zmm registers | 0 |
| nb stack references | 2.50 |
| ADD-SUB / MUL ratio | 0.77 |
| micro-operation queue | 21.08 cycles |
| front end | 21.08 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 26.00 | 26.00 | 7.33 | 7.33 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.33 |
| cycles | 26.00 | 26.00 | 7.33 | 7.33 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.33 |
| Cycles executing div or sqrt instructions | 49.00 |
| Longest recurrence chain latency (RecMII) | 1.00-2.00 |
| FE+BE cycles | 50.35-49.86 |
| Stall cycles | 28.47-27.97 |
| ROB full (events) | 15.26-15.02 |
| PRF_FLOAT full (events) | 29.54-29.05 |
| Front-end | 21.08 |
| Dispatch | 26.00 |
| DIV/SQRT | 49.00 |
| Data deps. | 1.00-2.00 |
| Overall L1 | 49.00 |
| all | 81% |
| load | 100% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 80% |
| all | 97% |
| load | 84% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 92% |
| all | 95% |
| load | 85% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 89% |
| all | 43% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 42% |
| all | 48% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| all | 47% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source file and lines | viscosity_kernel.f90:53-89 |
| Module | exec |
| nb instructions | 125 |
| nb uops | 125 |
| loop length | 754 |
| used x86 registers | 16 |
| used mmx registers | 0 |
| used xmm registers | 2 |
| used ymm registers | 32 |
| used zmm registers | 0 |
| nb stack references | 3 |
| ADD-SUB / MUL ratio | 0.77 |
| micro-operation queue | 21.33 cycles |
| front end | 21.33 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 26.00 | 26.00 | 7.67 | 7.67 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.67 |
| cycles | 26.00 | 26.00 | 7.67 | 7.67 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.67 |
| Cycles executing div or sqrt instructions | 49.00 |
| Longest recurrence chain latency (RecMII) | 1.00-2.00 |
| FE+BE cycles | 50.33-49.86 |
| Stall cycles | 28.18-27.71 |
| ROB full (events) | 30.51-30.03 |
| PRF_FLOAT full (events) | 27.51-27.06 |
| Front-end | 21.33 |
| Dispatch | 26.00 |
| DIV/SQRT | 49.00 |
| Data deps. | 1.00-2.00 |
| Overall L1 | 49.00 |
| all | 81% |
| load | 100% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 80% |
| all | 97% |
| load | 84% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 92% |
| all | 95% |
| load | 85% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 89% |
| all | 43% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 42% |
| all | 48% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| all | 47% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VCMPPD $0x1,%YMM13,%YMM22,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBROADCASTSD 0xa6878(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VBLENDMPD %YMM8,%YMM18,%YMM31{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VBROADCASTSD 0x89a80(%RIP),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VANDPD %YMM29,%YMM22,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMAXPD %YMM1,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM31,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VCMPPD $0x1,%YMM13,%YMM1,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBLENDMPD %YMM8,%YMM18,%YMM25{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VANDPD %YMM29,%YMM21,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMAXPD %YMM13,%YMM14,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM25,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM1,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM13,%YMM13,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VXORPD %XMM28,%XMM28,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VSQRTPD %YMM20,%YMM23 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9 |
| VMULPD %YMM2,%YMM23,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM1,%YMM2,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VANDPD %YMM29,%YMM1,%YMM22 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMULPD %YMM24,%YMM23,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM13,%YMM1,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VANDPD %YMM29,%YMM1,%YMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VCMPPD $0x2,%YMM21,%YMM22,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBLENDMPD %YMM22,%YMM21,%YMM30{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMULPD %YMM30,%YMM30,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM10,%YMM10,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| IMUL %R8,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| ADD %R9,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVUPD (%RCX,%R13,8),%YMM2{%K2}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM29,%YMM29,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMULPD %YMM2,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM10,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VPBROADCASTQ %R13,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPADDQ 0x897f7(%RIP),%YMM2,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
| VPBLENDMQ %YMM5,%YMM2,%YMM5{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM4,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM9,%YMM2,%YMM9{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM17,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM6,%YMM2,%YMM6{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM16,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM3,%YMM2,%YMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM15,%YMM22{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM11,%YMM2,%YMM11{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM19,%YMM21{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM12,%YMM2,%YMM12{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM0,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVDQA64 %YMM7,%YMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM27,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM28,%YMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVUPD %YMM1,(%R12,%R13,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
| ADD $0x4,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| VMOVDQA %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM29,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM30,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM21,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM22,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM23,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM25,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM31,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVAPD %YMM14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| JAE 46ba60 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x840> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| VMOVUPD -0x8(%R10,%R13,8),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%R10,%R13,8),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD -0x8(%R14,%R13,8),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%R14,%R13,8),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM21,%YMM23,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM22,%YMM10,%YMM25 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVUPD -0x8(%RDX,%R13,8),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%RDX,%R13,8),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VSUBPD %YMM25,%YMM2,%YMM25 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVUPD -0x8(%RSI,%R13,8),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%RSI,%R13,8),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM29,%YMM30,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM20,%YMM31,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM1,%YMM2,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| MOV 0x158(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| VMOVUPD -0x8(%RCX,%R13,8),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMULPD %YMM25,%YMM2,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM1,%YMM24,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM10,%YMM21,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM10,%YMM23,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM22,%YMM10,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VBROADCASTSD 0x8a69b(%RIP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VMULPD %YMM14,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM31,%YMM29,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM21,%YMM30,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM20,%YMM21,%YMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMULPD %YMM14,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM2,%YMM18,%YMM23 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VMULPD %YMM23,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMOVUPD (%RAX,%R13,8),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VFMADD231PD %YMM10,%YMM26,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VSUBPD -0x10(%RAX,%R13,8),%YMM21,%YMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VADDPD (%RCX,%R13,8),%YMM2,%YMM21 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VDIVPD %YMM21,%YMM10,%YMM22 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VMOVUPD (%R15,%R13,8),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VSUBPD (%R11,%R13,8),%YMM10,%YMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VMULPD 0x180(%RSP),%YMM10,%YMM21 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
| VMULPD %YMM22,%YMM22,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM21,%YMM21,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM14,%YMM25,%YMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM23,%YMM25,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM10,%YMM23,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM29,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM20,%YMM22,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD213PD %YMM23,%YMM21,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM1,%YMM26,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM10,%YMM29,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMAXPD %YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM1,%YMM20,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VCMPPD $0x1,%YMM10,%YMM28,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VCMPPD $0x2,%YMM13,%YMM28,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VCMPPD $0x6,%YMM13,%YMM28,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| KORW %K1,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| KNOTW %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| KMOVD %K2,%EBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| TEST $0xf,%BL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| JE 46b760 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| MOV 0x78(%RBP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| MOV (%RCX),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| JMP 46b760 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x540> | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.08 |
| Function | viscosity_kernel_.DIR.OMP.PARALLEL.2 |
| Source file and lines | viscosity_kernel.f90:53-89 |
| Module | exec |
| nb instructions | 122 |
| nb uops | 122 |
| loop length | 742 |
| used x86 registers | 15 |
| used mmx registers | 0 |
| used xmm registers | 2 |
| used ymm registers | 32 |
| used zmm registers | 0 |
| nb stack references | 2 |
| ADD-SUB / MUL ratio | 0.77 |
| micro-operation queue | 20.83 cycles |
| front end | 20.83 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| uops | 26.00 | 26.00 | 7.00 | 7.00 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.00 |
| cycles | 26.00 | 26.00 | 7.00 | 7.00 | 0.50 | 26.00 | 2.40 | 0.50 | 0.50 | 0.50 | 2.60 | 7.00 |
| Cycles executing div or sqrt instructions | 49.00 |
| Longest recurrence chain latency (RecMII) | 1.00-2.00 |
| FE+BE cycles | 50.37-49.85 |
| Stall cycles | 28.75-28.23 |
| PRF_FLOAT full (events) | 31.56-31.04 |
| Front-end | 20.83 |
| Dispatch | 26.00 |
| DIV/SQRT | 49.00 |
| Data deps. | 1.00-2.00 |
| Overall L1 | 49.00 |
| all | 81% |
| load | 100% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 100% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 80% |
| all | 97% |
| load | 84% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 92% |
| all | 95% |
| load | 85% |
| store | 100% |
| mul | 100% |
| add-sub | 100% |
| fma | 100% |
| div/sqrt | 100% |
| other | 89% |
| all | 43% |
| load | 50% |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | 50% |
| fma | NA (no fma vectorizable/vectorized instructions) |
| other | 42% |
| all | 48% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| all | 47% |
| load | 44% |
| store | 50% |
| mul | 50% |
| add-sub | 50% |
| fma | 50% |
| div/sqrt | 50% |
| other | 45% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | P8 | P9 | P10 | P11 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VCMPPD $0x1,%YMM13,%YMM22,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBROADCASTSD 0xa6878(%RIP),%YMM8 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VBLENDMPD %YMM8,%YMM18,%YMM31{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VBROADCASTSD 0x89a80(%RIP),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VANDPD %YMM29,%YMM22,%YMM1 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMAXPD %YMM1,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM31,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VCMPPD $0x1,%YMM13,%YMM1,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBLENDMPD %YMM8,%YMM18,%YMM25{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VANDPD %YMM29,%YMM21,%YMM13 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMAXPD %YMM13,%YMM14,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM25,%YMM13,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM1,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM13,%YMM13,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VXORPD %XMM28,%XMM28,%XMM28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| VSQRTPD %YMM20,%YMM23 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-19 | 9 |
| VMULPD %YMM2,%YMM23,%YMM2 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM1,%YMM2,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VANDPD %YMM29,%YMM1,%YMM22 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMULPD %YMM24,%YMM23,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM13,%YMM1,%YMM1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VANDPD %YMM29,%YMM1,%YMM21 | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VCMPPD $0x2,%YMM21,%YMM22,%K3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VBLENDMPD %YMM22,%YMM21,%YMM30{%K3} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMULPD %YMM30,%YMM30,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM10,%YMM10,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| IMUL %R8,%RCX | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| ADD %R9,%RCX | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVUPD (%RCX,%R13,8),%YMM2{%K2}{z} | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM29,%YMM29,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMULPD %YMM2,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM10,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VPBROADCASTQ %R13,%YMM2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VPADDQ 0x897f7(%RIP),%YMM2,%YMM2 | 1 | 0.33 | 0.33 | 0.33 | 0.33 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.40 |
| VPBLENDMQ %YMM5,%YMM2,%YMM5{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM4,%YMM31{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM9,%YMM2,%YMM9{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM17,%YMM25{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM6,%YMM2,%YMM6{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM16,%YMM23{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM3,%YMM2,%YMM3{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM15,%YMM22{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM11,%YMM2,%YMM11{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM19,%YMM21{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VPBLENDMQ %YMM12,%YMM2,%YMM12{%K1} | 1 | 0.33 | 0.33 | 0 | 0 | 0 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.33 |
| VMOVAPD %YMM0,%YMM30{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVDQA64 %YMM7,%YMM2{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM27,%YMM29{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM28,%YMM1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVUPD %YMM1,(%R12,%R13,8) | 1 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50 | 0.50 | 0.50 | 0 | 0 | 0-1 | 0.50 |
| ADD $0x4,%R13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.17 |
| VMOVDQA %YMM2,%YMM7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM29,%YMM27 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM30,%YMM0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM21,%YMM19 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM22,%YMM15 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM23,%YMM16 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM25,%YMM17 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VMOVAPD %YMM31,%YMM4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| CMP %RDI,%R13 | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 1 | 0.20 |
| VMOVAPD %YMM14,%YMM8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| JAE 46ba60 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x840> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
| VMOVUPD -0x8(%R10,%R13,8),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%R10,%R13,8),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD -0x8(%R14,%R13,8),%YMM22 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%R14,%R13,8),%YMM23 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM21,%YMM23,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM22,%YMM10,%YMM25 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVUPD -0x8(%RDX,%R13,8),%YMM29 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%RDX,%R13,8),%YMM30 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VSUBPD %YMM25,%YMM2,%YMM25 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMOVUPD -0x8(%RSI,%R13,8),%YMM31 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMOVUPD (%RSI,%R13,8),%YMM20 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VADDPD %YMM29,%YMM30,%YMM2 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM20,%YMM31,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM1,%YMM2,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| MOV 0x158(%RSP),%RCX | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 1 | 0.33 |
| VMOVUPD -0x8(%RCX,%R13,8),%YMM2 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VMULPD %YMM25,%YMM2,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM1,%YMM24,%YMM13 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM10,%YMM21,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM10,%YMM23,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM22,%YMM10,%YMM10 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VBROADCASTSD 0x8a69b(%RIP),%YMM14 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.33 |
| VMULPD %YMM14,%YMM10,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM31,%YMM29,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VSUBPD %YMM21,%YMM30,%YMM21 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VADDPD %YMM20,%YMM21,%YMM20 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMULPD %YMM14,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM2,%YMM18,%YMM23 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VMULPD %YMM23,%YMM20,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMOVUPD (%RAX,%R13,8),%YMM21 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VFMADD231PD %YMM10,%YMM26,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VSUBPD -0x10(%RAX,%R13,8),%YMM21,%YMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VADDPD (%RCX,%R13,8),%YMM2,%YMM21 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VDIVPD %YMM21,%YMM10,%YMM22 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VMOVUPD (%R15,%R13,8),%YMM10 | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 0-1 | 0.33 |
| VSUBPD (%R11,%R13,8),%YMM10,%YMM10 | 1 | 0 | 0.50 | 0.33 | 0.33 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.33 | 3 | 0.50 |
| VMULPD 0x180(%RSP),%YMM10,%YMM21 | 1 | 0.50 | 0.50 | 0.33 | 0.33 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.33 | 4 | 0.50 |
| VMULPD %YMM22,%YMM22,%YMM10 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM21,%YMM21,%YMM29 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM14,%YMM25,%YMM25 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM23,%YMM25,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM10,%YMM23,%YMM23 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM1,%YMM14,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM29,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VMULPD %YMM20,%YMM22,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD213PD %YMM23,%YMM21,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VFMADD231PD %YMM1,%YMM26,%YMM20 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VADDPD %YMM10,%YMM29,%YMM1 | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0.50 |
| VMAXPD %YMM8,%YMM1,%YMM1 | 1 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 0.50 |
| VDIVPD %YMM1,%YMM20,%YMM10 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13-15 | 8 |
| VCMPPD $0x1,%YMM10,%YMM28,%K1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VCMPPD $0x2,%YMM13,%YMM28,%K0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| VCMPPD $0x6,%YMM13,%YMM28,%K1{%K1} | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| KORW %K1,%K0,%K1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| KNOTW %K1,%K2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
| KMOVD %K2,%EBX | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 |
| TEST $0xf,%BL | 1 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0.20 | 0 | 0 | 0 | 0.20 | 0 | 2 | 0.20 |
| VMOVAPD %YMM8,%YMM14 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0-1 | 0.17 |
| VXORPD %XMM13,%XMM13,%XMM13 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.17 |
| JE 46b760 <viscosity_kernel_module_mp_viscosity_kernel_.DIR.OMP.PARALLEL.2+0x540> | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0 | 0.50 |
