[X86][SLM] Fix HADD/HSUB uops, latency and throughput
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Tue, 7 Sep 2021 21:30:21 +0000 (22:30 +0100)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Sat, 11 Sep 2021 10:44:09 +0000 (11:44 +0100)
Noticed while trying to improve generic reduction costs via the D103695 helper script. Confirmed with Intel AoM / Agner / InstLatX64.

llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s

index e5bc42a..bd7b56a 100644 (file)
@@ -420,12 +420,12 @@ def  : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 6, [6], 4>;
-defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV1],  6, [6], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
 defm : X86WriteResPairUnsupported<WriteFHAddZ>;
-defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddY,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
 defm : X86WriteResPairUnsupported<WritePHAddZ>;
 
 // String instructions.
index 2394973..bb34d31 100644 (file)
@@ -47,14 +47,14 @@ mwait
 # CHECK-NEXT:  1      7     2.00    *                   addsubpd       (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        addsubps       %xmm0, %xmm2
 # CHECK-NEXT:  1      6     1.00    *                   addsubps       (%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        haddpd %xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   haddpd (%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        haddps %xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   haddps (%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        hsubpd %xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   hsubpd (%rax), %xmm2
-# CHECK-NEXT:  4      6     3.00                        hsubps %xmm0, %xmm2
-# CHECK-NEXT:  4      9     3.00    *                   hsubps (%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        haddpd %xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   haddpd (%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        haddps %xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   haddps (%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        hsubpd %xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   hsubpd (%rax), %xmm2
+# CHECK-NEXT:  4      6     6.00                        hsubps %xmm0, %xmm2
+# CHECK-NEXT:  5      9     6.00    *                   hsubps (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00    *                   lddqu  (%rax), %xmm2
 # CHECK-NEXT:  1      100   1.00                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup        %xmm0, %xmm2
@@ -77,7 +77,7 @@ mwait
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     32.00  30.00   -      -     10.00
+# CHECK-NEXT:  -      -      -     8.00   54.00   -      -     10.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -85,14 +85,14 @@ mwait
 # CHECK-NEXT:  -      -      -      -     2.00    -      -     1.00   addsubpd (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     addsubps %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     1.00    -      -     1.00   addsubps (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddpd   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddpd   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     haddps   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   haddps   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubpd   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubpd   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     hsubps   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   hsubps   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     haddpd   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   haddpd   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     haddps   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   haddps   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     hsubpd   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   hsubpd   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -      -     hsubps   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -      -     6.00    -      -     1.00   hsubps   (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00   lddqu    (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     monitor
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     movddup  %xmm0, %xmm2
index 3fb4878..e74a73f 100644 (file)
@@ -122,30 +122,30 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   palignr        $1, (%rax), %mm2
 # CHECK-NEXT:  1      1     1.00                        palignr        $1, %xmm0, %xmm2
 # CHECK-NEXT:  1      4     1.00    *                   palignr        $1, (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddd %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddd (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddd %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddd (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddsw        %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddsw        (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddsw        %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddsw        (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phaddw %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddw (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phaddw %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phaddw (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubd %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubd (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubd %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubd (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubsw        %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubsw        (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubsw        %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubsw        (%rax), %xmm2
-# CHECK-NEXT:  1      1     0.50                        phsubw %mm0, %mm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubw (%rax), %mm2
-# CHECK-NEXT:  1      1     0.50                        phsubw %xmm0, %xmm2
-# CHECK-NEXT:  1      4     1.00    *                   phsubw (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddd %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddd (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddd %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddd (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddsw        %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddsw        (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddsw        %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddsw        (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phaddw %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddw (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phaddw %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phaddw (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubd %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubd (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubd %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubd (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubsw        %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubsw        (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubsw        %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubsw        (%rax), %xmm2
+# CHECK-NEXT:  3      6     3.00                        phsubw %mm0, %mm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubw (%rax), %mm2
+# CHECK-NEXT:  3      6     3.00                        phsubw %xmm0, %xmm2
+# CHECK-NEXT:  4      9     3.00    *                   phsubw (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmaddubsw      %mm0, %mm2
 # CHECK-NEXT:  1      7     1.00    *                   pmaddubsw      (%rax), %mm2
 # CHECK-NEXT:  1      5     2.00                        pmaddubsw      %xmm0, %xmm2
@@ -183,7 +183,7 @@ psignw      (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]
-# CHECK-NEXT:  -      -      -     52.00  24.00   -      -     32.00
+# CHECK-NEXT:  -      -      -     112.00 84.00   -      -     32.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    Instructions:
@@ -203,30 +203,30 @@ psignw      (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   palignr  $1, (%rax), %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     palignr  $1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   palignr  $1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddd   %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddd   (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddd   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddd   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddsw  %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddsw  (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddsw  %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddsw  (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddw   %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddw   (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phaddw   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phaddw   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubd   %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubd   (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubd   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubd   (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubsw  %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubsw  (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubsw  %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubsw  (%rax), %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubw   %mm0, %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubw   (%rax), %mm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -      -     phsubw   %xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     0.50   0.50    -      -     1.00   phsubw   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddd   %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddd   (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddd   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddd   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddsw  %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddsw  (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddsw  %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddsw  (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddw   %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddw   (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phaddw   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phaddw   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubd   %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubd   (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubd   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubd   (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubsw  %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubsw  (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubsw  %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubsw  (%rax), %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubw   %mm0, %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubw   (%rax), %mm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -      -     phsubw   %xmm0, %xmm2
+# CHECK-NEXT:  -      -      -     3.00   3.00    -      -     1.00   phsubw   (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     pmaddubsw        %mm0, %mm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00   pmaddubsw        (%rax), %mm2
 # CHECK-NEXT:  -      -      -     2.00    -      -      -      -     pmaddubsw        %xmm0, %xmm2