| Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2307-2320 | Coverage: 0.01% |
|---|
| Function: hypre_BoomerAMGCorrectCFMarker | Module: exec | Source: par_strength.c:2307-2320 | Coverage: 0.01% |
|---|
/home/kcamus/qaas_runs/169-443-9681/intel/AMG/build/AMG/AMG/parcsr_ls/par_strength.c: 2307 - 2320 |
-------------------------------------------------------------------------------- |
2307: { |
2308: HYPRE_Int i, cnt; |
2309: |
2310: cnt = 0; |
2311: for (i=0; i < num_var; i++) |
2312: { |
2313: if (CF_marker[i] > 0 ) |
2314: { |
2315: if (CF_marker[i] == 1) CF_marker[i] = new_CF_marker[cnt++]; |
2316: else { CF_marker[i] = 1; cnt++;} |
2317: } |
2318: } |
2319: |
2320: return 0; |
0x495d10 PUSH %RBP |
0x495d11 MOV %RSP,%RBP |
0x495d14 TEST %RSI,%RSI |
0x495d17 JLE 495d2e |
0x495d19 CMP $0x4,%RSI |
0x495d1d JAE 495d80 |
0x495d1f XOR %R8D,%R8D |
0x495d22 MOV %RSI,%RAX |
0x495d25 AND $-0x4,%RAX |
0x495d29 CMP %RSI,%RAX |
0x495d2c JB 495d4f |
0x495d2e XOR %EAX,%EAX |
0x495d30 POP %RBP |
0x495d31 RET |
0x495d32 NOPW %CS:(%RAX,%RAX,1) |
(2288) 0x495d40 INC %R8 |
(2288) 0x495d43 MOV %R9,(%RDI,%RAX,8) |
(2288) 0x495d47 INC %RAX |
(2288) 0x495d4a CMP %RAX,%RSI |
(2288) 0x495d4d JE 495d2e |
(2288) 0x495d4f MOV (%RDI,%RAX,8),%RCX |
(2288) 0x495d53 TEST %RCX,%RCX |
(2288) 0x495d56 JLE 495d47 |
(2288) 0x495d58 MOV $0x1,%R9D |
(2288) 0x495d5e CMP $0x1,%RCX |
(2288) 0x495d62 JNE 495d40 |
(2288) 0x495d64 MOV (%RDX,%R8,8),%R9 |
(2288) 0x495d68 JMP 495d40 |
0x495d6a NOPW %CS:(%RAX,%RAX,1) |
0x495d79 NOPL (%RAX) |
0x495d80 MOV %RSI,%R10 |
0x495d83 SHR $0x2,%R10 |
0x495d87 LEA 0x18(%RDI),%RAX |
0x495d8b XOR %R8D,%R8D |
0x495d8e JMP 495db3 |
0x495d90 NOPW %CS:(%RAX,%RAX,1) |
0x495d9f NOP |
(2289) 0x495da0 INC %R8 |
(2289) 0x495da3 MOV %R9,(%RAX) |
(2289) 0x495da6 ADD $0x20,%RAX |
(2289) 0x495daa DEC %R10 |
(2289) 0x495dad JE 495d22 |
(2289) 0x495db3 MOV -0x18(%RAX),%RCX |
(2289) 0x495db7 TEST %RCX,%RCX |
(2289) 0x495dba JLE 495dd3 |
(2289) 0x495dbc MOV $0x1,%R9D |
(2289) 0x495dc2 CMP $0x1,%RCX |
(2289) 0x495dc6 JNE 495dcc |
(2289) 0x495dc8 MOV (%RDX,%R8,8),%R9 |
(2289) 0x495dcc INC %R8 |
(2289) 0x495dcf MOV %R9,-0x18(%RAX) |
(2289) 0x495dd3 MOV -0x10(%RAX),%RCX |
(2289) 0x495dd7 TEST %RCX,%RCX |
(2289) 0x495dda JLE 495df3 |
(2289) 0x495ddc MOV $0x1,%R9D |
(2289) 0x495de2 CMP $0x1,%RCX |
(2289) 0x495de6 JNE 495dec |
(2289) 0x495de8 MOV (%RDX,%R8,8),%R9 |
(2289) 0x495dec INC %R8 |
(2289) 0x495def MOV %R9,-0x10(%RAX) |
(2289) 0x495df3 MOV -0x8(%RAX),%RCX |
(2289) 0x495df7 TEST %RCX,%RCX |
(2289) 0x495dfa JLE 495e13 |
(2289) 0x495dfc MOV $0x1,%R9D |
(2289) 0x495e02 CMP $0x1,%RCX |
(2289) 0x495e06 JNE 495e0c |
(2289) 0x495e08 MOV (%RDX,%R8,8),%R9 |
(2289) 0x495e0c INC %R8 |
(2289) 0x495e0f MOV %R9,-0x8(%RAX) |
(2289) 0x495e13 MOV (%RAX),%RCX |
(2289) 0x495e16 TEST %RCX,%RCX |
(2289) 0x495e19 JLE 495da6 |
(2289) 0x495e1b MOV $0x1,%R9D |
(2289) 0x495e21 CMP $0x1,%RCX |
(2289) 0x495e25 JNE 495da0 |
(2289) 0x495e2b MOV (%RDX,%R8,8),%R9 |
(2289) 0x495e2f JMP 495da0 |
0x495e34 NOPW %CS:(%RAX,%RAX,1) |
| Coverage (%) | Name | Source Location | Module |
|---|---|---|---|
| ►100.00+ | hypre_BoomerAMGSetup | par_amg_setup.c:730 | exec |
| ○ | hypre_PCGSetup | pcg.c:234 | exec |
| ○ | main | amg.c:398 | exec |
| ○ | __libc_init_first | libc.so.6 |
| Path / |
| Source file and lines | par_strength.c:2307-2320 |
| Module | exec |
| nb instructions | 25 |
| nb uops | 25 |
| loop length | 114 |
| used x86 registers | 7 |
| used mmx registers | 0 |
| used xmm registers | 0 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
| cycles | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 6.33 |
| Stall cycles | 0.00 |
| Front-end | 6.25 |
| Dispatch | 3.00 |
| Overall L1 | 6.25 |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 10% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 10% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JLE 495d2e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JAE 495d80 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JB 495d4f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
| LEA 0x18(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| JMP 495db3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| Source file and lines | par_strength.c:2307-2320 |
| Module | exec |
| nb instructions | 25 |
| nb uops | 25 |
| loop length | 114 |
| used x86 registers | 7 |
| used mmx registers | 0 |
| used xmm registers | 0 |
| used ymm registers | 0 |
| used zmm registers | 0 |
| nb stack references | 0 |
| micro-operation queue | 6.25 cycles |
| front end | 6.25 cycles |
| P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | |
|---|---|---|---|---|---|---|---|---|
| uops | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
| cycles | 3.00 | 2.50 | 1.17 | 0.83 | 1.00 | 2.50 | 3.00 | 1.00 |
| Cycles executing div or sqrt instructions | NA |
| FE+BE cycles | 6.33 |
| Stall cycles | 0.00 |
| Front-end | 6.25 |
| Dispatch | 3.00 |
| Overall L1 | 6.25 |
| all | 0% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 0% |
| all | 10% |
| load | NA (no load vectorizable/vectorized instructions) |
| store | NA (no store vectorizable/vectorized instructions) |
| mul | NA (no mul vectorizable/vectorized instructions) |
| add-sub | NA (no add-sub vectorizable/vectorized instructions) |
| fma | NA (no fma vectorizable/vectorized instructions) |
| div/sqrt | NA (no div/sqrt vectorizable/vectorized instructions) |
| other | 10% |
| Instruction | Nb FU | P0 | P1 | P2 | P3 | P4 | P5 | P6 | P7 | Latency | Recip. throughput |
|---|---|---|---|---|---|---|---|---|---|---|---|
| PUSH %RBP | 1 | 0 | 0 | 0.33 | 0.33 | 1 | 0 | 0 | 0.33 | 3 | 1 |
| MOV %RSP,%RBP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| TEST %RSI,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JLE 495d2e | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| CMP $0x4,%RSI | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JAE 495d80 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| MOV %RSI,%RAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| AND $-0x4,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| CMP %RSI,%RAX | 1 | 0.25 | 0.25 | 0 | 0 | 0 | 0.25 | 0.25 | 0 | 1 | 0.25 |
| JB 495d4f | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 0 | 0.50-1 |
| XOR %EAX,%EAX | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| POP %RBP | 1 | 0 | 0 | 0.50 | 0.50 | 0 | 0 | 0 | 0 | 2 | 0.50 |
| RET | 1 | 0 | 0 | 0.33 | 0.33 | 0 | 0 | 1 | 0.33 | 0 | 1 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPL (%RAX) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| MOV %RSI,%R10 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| SHR $0x2,%R10 | 1 | 0.50 | 0 | 0 | 0 | 0 | 0 | 0.50 | 0 | 1 | 0.50 |
| LEA 0x18(%RDI),%RAX | 1 | 0 | 0.50 | 0 | 0 | 0 | 0.50 | 0 | 0 | 1 | 0.50 |
| XOR %R8D,%R8D | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| JMP 495db3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1-2 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOP | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| NOPW %CS:(%RAX,%RAX,1) | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.25 |
| Name | Coverage (%) | Time (s) |
|---|---|---|
| ▼hypre_BoomerAMGCorrectCFMarker– | 0.01 | 0 |
| ○Loop 2289 - par_strength.c:2311-2315 - exec | 0.01 | 0 |
| ○Loop 2288 - par_strength.c:2311-2315 - exec | 0 | 0 |
