From 0c9f6ad6f450ef77904f5a739762f95bf8308b6b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 9 Apr 2023 16:33:47 -0700 Subject: [PATCH] [X86] Correct the scheduling information for AVX-VNNI and AVX512-VNNI instructons. The AVXVNNI load instructions weren't using the Folded load write class and they had no ReadAdvance. The YMM versions were using the XMM schedule class. The AVX512VNNI instructions had the right classes, but not enough ReadAdvances to account for the 2 sources. Noticed while investigating #62026. Reviewed By: pengfei Differential Revision: https://reviews.llvm.org/D147872 --- llvm/lib/Target/X86/X86InstrAVX512.td | 6 ++++-- llvm/lib/Target/X86/X86InstrSSE.td | 10 +++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d60247f..49fe951 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12577,7 +12577,8 @@ multiclass VNNI_rmb Op, string OpStr, SDNode OpNode, (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.LdFrag addr:$src3))))>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, - Sched<[sched.Folded, sched.ReadAfterFold]>; + Sched<[sched.Folded, sched.ReadAfterFold, + sched.ReadAfterFold]>; defm mb : AVX512_maskable_3src Op, string OpStr, SDNode OpNode, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, - T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; + T8PD, Sched<[sched.Folded, sched.ReadAfterFold, + sched.ReadAfterFold]>; } } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 9345bda..92b4d5d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7332,7 +7332,9 @@ multiclass avx_vnni_rm opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>, - VEX_4V, Sched<[SchedWriteVecIMul.XMM]>; + VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded, + SchedWriteVecIMul.XMM.ReadAfterFold, + SchedWriteVecIMul.XMM.ReadAfterFold]>; let isCommutable = IsCommutable in def Yrr : AVX8I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>; def Yrm : AVX8I, - VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>; + VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded, + SchedWriteVecIMul.YMM.ReadAfterFold, + SchedWriteVecIMul.YMM.ReadAfterFold]>; } defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>; -- 2.7.4