[X86] Folded MOVDDUPrm has the same sched behaviour as MOVSHDUPrm/MOVSLDUPrm on Haswe...
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 7 Nov 2022 15:17:23 +0000 (15:17 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Mon, 7 Nov 2022 15:17:32 +0000 (15:17 +0000)
There can be a difference for MOVDDUPrr but not the load folded broadcast that is purely on Port23

Fixes an old TODO (inherited from SkylakeServer which was fixed at c7662dc3e52801ec824d8473278fb976107d3e57)

Confirmed on Agner + uops.info

llvm/lib/Target/X86/X86SchedHaswell.td
llvm/lib/Target/X86/X86SchedIceLake.td
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s

index 44fc1ac..bd4cbe2 100644 (file)
@@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> {
 def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>;
 def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm",
                                            "(V?)MOVSLDUPrm",
+                                           "(V?)MOVDDUPrm",
                                            "VPBROADCAST(D|Q)rm")>;
 
 def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> {
@@ -881,13 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128,
 def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m",
                                              "VPBROADCAST(D|Q)Yrm")>;
 
-def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[HWWriteResGroup0_2], (instregex "(V?)MOVDDUPrm")>;
-
 def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
   let Latency = 1;
   let NumMicroOps = 2;
index 43fb6ee..331fafa 100644 (file)
@@ -1066,13 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
 }
 def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
 
-def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> {
-  let Latency = 5;
-  let NumMicroOps = 1;
-  let ResourceCycles = [1];
-}
-def: InstRW<[ICXWriteResGroup58], (instregex "(V?)MOVDDUPrm")>;  // TODO: Should this be ICXWriteResGroup71?
-
 def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
   let Latency = 5;
   let NumMicroOps = 2;
@@ -1174,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
                                           VPBROADCASTQrm,
                                           VMOVSHDUPrm,
                                           VMOVSLDUPrm,
+                                          VMOVDDUPrm,
                                           MOVSHDUPrm,
-                                          MOVSLDUPrm)>;
+                                          MOVSLDUPrm,
+                                          MOVDDUPrm)>;
 
 def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
   let Latency = 6;
index ec5c773..ea7d251 100644 (file)
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00                        vmovd  %xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd  %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup       (%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup       (%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa        %xmm0, %xmm2
index 6832def..7085718 100644 (file)
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu  (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup        %xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup        (%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup        (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movshdup       %xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        movsldup       %xmm0, %xmm2
index fa0720f..383ddac 100644 (file)
@@ -1269,7 +1269,7 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovd  %xmm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovd  %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        vmovddup       %xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   vmovddup       (%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   vmovddup       (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        vmovddup       %ymm0, %ymm2
 # CHECK-NEXT:  1      7     0.50    *                   vmovddup       (%rax), %ymm2
 # CHECK-NEXT:  1      1     0.33                        vmovdqa        %xmm0, %xmm2
index e09b9e0..4d19424 100644 (file)
@@ -58,7 +58,7 @@ mwait
 # CHECK-NEXT:  1      6     0.50    *                   lddqu  (%rax), %xmm2
 # CHECK-NEXT:  1      100   0.25                  U     monitor
 # CHECK-NEXT:  1      1     1.00                        movddup        %xmm0, %xmm2
-# CHECK-NEXT:  1      5     0.50    *                   movddup        (%rax), %xmm2
+# CHECK-NEXT:  1      6     0.50    *                   movddup        (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movshdup       %xmm0, %xmm2
 # CHECK-NEXT:  1      6     0.50    *                   movshdup       (%rax), %xmm2
 # CHECK-NEXT:  1      1     0.50                        movsldup       %xmm0, %xmm2