[X86] AMD Zen 3: same-reg SBB is a dependency-breaking instruction
authorRoman Lebedev <lebedev.ri@gmail.com>
Sun, 9 May 2021 20:14:17 +0000 (23:14 +0300)
committerRoman Lebedev <lebedev.ri@gmail.com>
Sun, 9 May 2021 21:03:20 +0000 (00:03 +0300)
As confirmed by exegesis measurements, and ref docs.
It does actually execute.

While there, bump latency for MULX32rr, that seems to match measurements.

llvm/lib/Target/X86/X86ScheduleZnver3.td
llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s
llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s

index f4a0328..38d359e 100644 (file)
@@ -611,7 +611,7 @@ defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Intege
 defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>;    // Integer 32-bit multiplication.
 
 def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
-  let Latency = 3;
+  let Latency = 4;
   let ResourceCycles = [1];
   let NumMicroOps = 2;
 }
@@ -1529,4 +1529,10 @@ def : IsZeroIdiomFunction<[
                      SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
 ]>;
 
+def : IsDepBreakingFunction<[
+  // GPR
+  DepBreakingClass<[ SBB32rr, SBB32rr_REV,
+                     SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>,
+]>;
+
 } // SchedModel
index bcf2435..d74a52d 100644 (file)
@@ -15,12 +15,12 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Iterations:        1000
 # CHECK-NEXT: Instructions:      2000
-# CHECK-NEXT: Total Cycles:      4003
+# CHECK-NEXT: Total Cycles:      1259
 # CHECK-NEXT: Total uOps:        3000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    0.75
-# CHECK-NEXT: IPC:               0.50
+# CHECK-NEXT: uOps Per Cycle:    2.38
+# CHECK-NEXT: IPC:               1.59
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -33,7 +33,7 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
 # CHECK-NEXT:  1      1     1.00                        sbbl   %eax, %eax
-# CHECK-NEXT:  2      3     1.00                        mulxl  %eax, %eax, %eax
+# CHECK-NEXT:  2      4     1.00                        mulxl  %eax, %eax, %eax
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0]   - Zn3AGU0
@@ -62,21 +62,20 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     1.33   1.00   1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     1.25   1.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -     1.33    -     1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbl    %eax, %eax
+# CHECK-NEXT:  -      -      -     1.25   0.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbl    %eax, %eax
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mulxl   %eax, %eax, %eax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     0
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .    .   sbbl     %eax, %eax
-# CHECK-NEXT: [0,1]     D=eeeER   .   mulxl    %eax, %eax, %eax
-# CHECK-NEXT: [1,0]     D====eER  .   sbbl     %eax, %eax
-# CHECK-NEXT: [1,1]     D=====eeeER   mulxl    %eax, %eax, %eax
+# CHECK:      [0,0]     DeER .  .   sbbl       %eax, %eax
+# CHECK-NEXT: [0,1]     D=eeeeER.   mulxl      %eax, %eax, %eax
+# CHECK-NEXT: [1,0]     D=eE---R.   sbbl       %eax, %eax
+# CHECK-NEXT: [1,1]     D==eeeeER   mulxl      %eax, %eax, %eax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -85,20 +84,20 @@ mulxq %rax, %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.0    0.5    0.0       sbbl        %eax, %eax
-# CHECK-NEXT: 1.     2     4.0    0.0    0.0       mulxl       %eax, %eax, %eax
-# CHECK-NEXT:        2     3.5    0.3    0.0       <total>
+# CHECK-NEXT: 0.     2     1.5    0.5    1.5       sbbl        %eax, %eax
+# CHECK-NEXT: 1.     2     2.5    0.0    0.0       mulxl       %eax, %eax, %eax
+# CHECK-NEXT:        2     2.0    0.3    0.8       <total>
 
 # CHECK:      [1] Code Region
 
 # CHECK:      Iterations:        1000
 # CHECK-NEXT: Instructions:      2000
-# CHECK-NEXT: Total Cycles:      5003
+# CHECK-NEXT: Total Cycles:      1259
 # CHECK-NEXT: Total uOps:        3000
 
 # CHECK:      Dispatch Width:    6
-# CHECK-NEXT: uOps Per Cycle:    0.60
-# CHECK-NEXT: IPC:               0.40
+# CHECK-NEXT: uOps Per Cycle:    2.38
+# CHECK-NEXT: IPC:               1.59
 # CHECK-NEXT: Block RThroughput: 1.0
 
 # CHECK:      Instruction Info:
@@ -140,21 +139,20 @@ mulxq %rax, %rax, %rax
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
-# CHECK-NEXT:  -      -      -     1.33   1.00   1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
+# CHECK-NEXT:  -      -      -     1.25   1.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12.0] [12.1] [13]   [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
-# CHECK-NEXT:  -      -      -     1.33    -     1.33   1.34    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbq    %rax, %rax
+# CHECK-NEXT:  -      -      -     1.25   0.25   1.25   1.25    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     sbbq    %rax, %rax
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mulxq   %rax, %rax, %rax
 
 # CHECK:      Timeline view:
-# CHECK-NEXT:                     012
-# CHECK-NEXT: Index     0123456789
+# CHECK-NEXT: Index     012345678
 
-# CHECK:      [0,0]     DeER .    . .   sbbq   %rax, %rax
-# CHECK-NEXT: [0,1]     D=eeeeER  . .   mulxq  %rax, %rax, %rax
-# CHECK-NEXT: [1,0]     D=====eER . .   sbbq   %rax, %rax
-# CHECK-NEXT: [1,1]     D======eeeeER   mulxq  %rax, %rax, %rax
+# CHECK:      [0,0]     DeER .  .   sbbq       %rax, %rax
+# CHECK-NEXT: [0,1]     D=eeeeER.   mulxq      %rax, %rax, %rax
+# CHECK-NEXT: [1,0]     D=eE---R.   sbbq       %rax, %rax
+# CHECK-NEXT: [1,1]     D==eeeeER   mulxq      %rax, %rax, %rax
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -163,6 +161,6 @@ mulxq %rax, %rax, %rax
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     2     3.5    0.5    0.0       sbbq        %rax, %rax
-# CHECK-NEXT: 1.     2     4.5    0.0    0.0       mulxq       %rax, %rax, %rax
-# CHECK-NEXT:        2     4.0    0.3    0.0       <total>
+# CHECK-NEXT: 0.     2     1.5    0.5    1.5       sbbq        %rax, %rax
+# CHECK-NEXT: 1.     2     2.5    0.0    0.0       mulxq       %rax, %rax, %rax
+# CHECK-NEXT:        2     2.0    0.3    0.8       <total>
index 5a92920..1c2ccfc 100644 (file)
@@ -62,8 +62,8 @@ shrx        %rax, (%rbx), %rcx
 # CHECK-NEXT:  2      5     0.50    *                   bzhil  %eax, (%rbx), %ecx
 # CHECK-NEXT:  1      1     0.50                        bzhiq  %rax, %rbx, %rcx
 # CHECK-NEXT:  2      5     0.50    *                   bzhiq  %rax, (%rbx), %rcx
-# CHECK-NEXT:  2      3     1.00                        mulxl  %eax, %ebx, %ecx
-# CHECK-NEXT:  2      7     2.00    *                   mulxl  (%rax), %ebx, %ecx
+# CHECK-NEXT:  2      4     1.00                        mulxl  %eax, %ebx, %ecx
+# CHECK-NEXT:  2      8     2.00    *                   mulxl  (%rax), %ebx, %ecx
 # CHECK-NEXT:  2      4     1.00                        mulxq  %rax, %rbx, %rcx
 # CHECK-NEXT:  2      8     2.00    *                   mulxq  (%rax), %rbx, %rcx
 # CHECK-NEXT:  1      3     1.00                        pdepl  %eax, %ebx, %ecx