[X86] Correct the scheduling information for AVX-VNNI and AVX512-VNNI instructons.
authorCraig Topper <craig.topper@sifive.com>
Sun, 9 Apr 2023 23:33:47 +0000 (16:33 -0700)
committerCraig Topper <craig.topper@sifive.com>
Sun, 9 Apr 2023 23:33:48 +0000 (16:33 -0700)
The AVXVNNI load instructions weren't using the Folded load write
class and they had no ReadAdvance.
The YMM versions were using the XMM schedule class.

The AVX512VNNI instructions had the right classes, but not enough
ReadAdvances to account for the 2 sources.

Noticed while investigating #62026.

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D147872

llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td

index d60247f..49fe951 100644 (file)
@@ -12577,7 +12577,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
                                    (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
                                             (VTI.VT (VTI.LdFrag addr:$src3))))>,
                                    EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
-                                   Sched<[sched.Folded, sched.ReadAfterFold]>;
+                                   Sched<[sched.Folded, sched.ReadAfterFold,
+                                          sched.ReadAfterFold]>;
   defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
                                    (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
                                    OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
@@ -12585,7 +12586,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
                                    (OpNode VTI.RC:$src1, VTI.RC:$src2,
                                     (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
                                    EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
-                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
+                                   T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
+                                                sched.ReadAfterFold]>;
   }
 }
 
index 9345bda..92b4d5d 100644 (file)
@@ -7332,7 +7332,9 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
              !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
              [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
                                       (loadv4i32 addr:$src3))))]>,
-             VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+             VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded,
+                            SchedWriteVecIMul.XMM.ReadAfterFold,
+                            SchedWriteVecIMul.XMM.ReadAfterFold]>;
 
   let isCommutable = IsCommutable in
   def Yrr  : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
@@ -7340,14 +7342,16 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
              !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
              [(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
                                        VR256:$src2, VR256:$src3)))]>,
-             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
 
   def Yrm  : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, VR256:$src2, i256mem:$src3),
              !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
              [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
                                       (loadv8i32 addr:$src3))))]>,
-             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+             VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
+                                   SchedWriteVecIMul.YMM.ReadAfterFold,
+                                   SchedWriteVecIMul.YMM.ReadAfterFold]>;
 }
 
 defm VPDPBUSD   : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;