From f6b81dae9e9cf5db81eb018069569892e179cdbd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 1 May 2018 14:14:42 +0000 Subject: [PATCH] [X86] Convert all uses of WriteFShuffle to X86SchedWriteWidths. In preparation of splitting WriteFShuffle by vector width. llvm-svn: 331262 --- llvm/lib/Target/X86/X86InstrAVX512.td | 150 +++++++++++++++++++--------------- llvm/lib/Target/X86/X86InstrSSE.td | 126 +++++++++++++++------------- 2 files changed, 153 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5c871fd..044682c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -758,14 +758,15 @@ def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, - EVEX_4V, Sched<[WriteFShuffle]>; + EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, - EVEX_4V, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>; + EVEX_4V, EVEX_CD8<32, CD8VT1>, + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -3766,7 +3767,7 @@ multiclass avx512_move_scalar, EVEX_4V, Sched<[WriteFShuffle]>; + _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", @@ -3774,7 +3775,7 @@ multiclass avx512_move_scalar, EVEX_4V, EVEX_KZ, Sched<[WriteFShuffle]>; + _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -3783,7 +3784,7 @@ multiclass avx512_move_scalar, EVEX_4V, EVEX_K, Sched<[WriteFShuffle]>; + _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), @@ -3977,7 +3978,8 @@ let hasSideEffects = 0 in { (ins VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrr">, Sched<[WriteFShuffle]>; + FoldGenData<"VMOVSSZrr">, + Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), @@ -3986,20 +3988,23 @@ let Constraints = "$src0 = $dst" in "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", []>, EVEX_K, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrrk">, Sched<[WriteFShuffle]>; + FoldGenData<"VMOVSSZrrk">, + Sched<[SchedWriteFShuffle.XMM]>; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, - FoldGenData<"VMOVSSZrrkz">, Sched<[WriteFShuffle]>; + FoldGenData<"VMOVSSZrrkz">, + Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, EVEX_4V, VEX_LIG, VEX_W, - FoldGenData<"VMOVSDZrr">, Sched<[WriteFShuffle]>; + FoldGenData<"VMOVSDZrr">, + Sched<[SchedWriteFShuffle.XMM]>; let Constraints = "$src0 = $dst" in def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), @@ -4008,7 +4013,8 @@ let Constraints = "$src0 = $dst" in "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", []>, EVEX_K, XD, EVEX_4V, VEX_LIG, - VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteFShuffle]>; + VEX_W, FoldGenData<"VMOVSDZrrk">, + Sched<[SchedWriteFShuffle.XMM]>; def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.KRCWM:$mask, VR128X:$src1, @@ -4016,7 +4022,8 @@ let Constraints = "$src0 = $dst" in "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, - VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteFShuffle]>; + VEX_W, FoldGenData<"VMOVSDZrrkz">, + Sched<[SchedWriteFShuffle.XMM]>; } let Predicates = [HasAVX512] in { @@ -5953,6 +5960,7 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", X86VPermi, WriteFShuffle256, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; + //===----------------------------------------------------------------------===// // AVX-512 - VPERMIL //===----------------------------------------------------------------------===// @@ -6048,21 +6056,23 @@ defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, WriteVarShuffle>, //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// + def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>, - Sched<[WriteFShuffle]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>, - Sched<[WriteFShuffle]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V; //===----------------------------------------------------------------------===// // VMOVHPS/PD VMOVLPS Instructions // All patterns was taken from SSS implementation. //===----------------------------------------------------------------------===// + multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in @@ -6074,7 +6084,7 @@ multiclass avx512_mov_hilo_packed opc, string OpcodeStr, SDNode OpNode, (OpNode _.RC:$src1, (_.VT (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, - Sched<[WriteFShuffleLd, ReadAfterLd]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; } defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps, @@ -9279,29 +9289,32 @@ multiclass avx512_common_fp_sae_packed_imm opc, SDNode OpNode, string OpStr, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo DestInfo, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { - defm Z : avx512_3Op_rm_imm8, EVEX_V512, AVX512AIi8Base, EVEX_4V; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_rm_imm8, EVEX_V128, AVX512AIi8Base, EVEX_4V; - defm Z256 : avx512_3Op_rm_imm8, EVEX_V256, AVX512AIi8Base, EVEX_4V; } } multiclass avx512_common_3Op_imm8 opc, SDNode OpNode, X86FoldableSchedWrite sched, + bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched, Predicate Pred = HasAVX512> { let Predicates = [Pred] in { - defm Z : avx512_3Op_imm8, EVEX_V512; + defm Z : avx512_3Op_imm8, + EVEX_V512; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_imm8, EVEX_V128; - defm Z256 : avx512_3Op_imm8, EVEX_V256; + defm Z128 : avx512_3Op_imm8, + EVEX_V128; + defm Z256 : avx512_3Op_imm8, + EVEX_V256; } } @@ -9523,20 +9536,20 @@ def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 0)>; } -multiclass avx512_valign { defm NAME: avx512_common_3Op_imm8, AVX512AIi8Base, EVEX_4V; } -defm VALIGND: avx512_valign<"valignd", WriteShuffle, avx512vl_i32_info>, +defm VALIGND: avx512_valign<"valignd", SchedWriteShuffle, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VALIGNQ: avx512_valign<"valignq", WriteShuffle, avx512vl_i64_info>, +defm VALIGNQ: avx512_valign<"valignq", SchedWriteShuffle, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; -defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", - WriteShuffle, avx512vl_i8_info, - avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; +defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", + SchedWriteShuffle, avx512vl_i8_info, + avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. @@ -9656,7 +9669,7 @@ let Predicates = [HasVLX, HasBWI] in { } defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", - WriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>, + SchedWriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm opc, string OpcodeStr, SDNode OpNode, @@ -9691,37 +9704,38 @@ multiclass avx512_unary_rmb opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_unary_rm_vl opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, - Predicate prd> { + X86SchedWriteWidths sched, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rm, + defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_unary_rm, + defm Z128 : avx512_unary_rm, EVEX_V128; } } multiclass avx512_unary_rmb_vl opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rmb, + defm Z : avx512_unary_rmb, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rmb, + defm Z256 : avx512_unary_rmb, EVEX_V256; - defm Z128 : avx512_unary_rmb, + defm Z128 : avx512_unary_rmb, EVEX_V128; } } multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched, Predicate prd> { + SDNode OpNode, X86SchedWriteWidths sched, + Predicate prd> { defm Q : avx512_unary_rmb_vl, VEX_W; defm D : avx512_unary_rmb_vl opc_d, bits<8> opc_q, string OpcodeStr, } multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched, Predicate prd> { + SDNode OpNode, X86SchedWriteWidths sched, + Predicate prd> { defm W : avx512_unary_rm_vl, VEX_WIG; defm B : avx512_unary_rm_vl opc_b, bits<8> opc_w, string OpcodeStr, multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm NAME : avx512_unary_rm_vl_dq, avx512_unary_rm_vl_bw; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, WriteVecALU>; +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, + SchedWriteVecALU>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -9786,11 +9802,11 @@ multiclass avx512_unary_lowering; + SchedWriteVecALU, HasCDI>; // FIXME: Is there a better scheduler class for VPCONFLICT? defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, - WriteVecALU, HasCDI>; + SchedWriteVecALU, HasCDI>; // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; @@ -9802,7 +9818,7 @@ defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, - WriteVecALU, HasVPOPCNTDQ>; + SchedWriteVecALU, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; @@ -9810,14 +9826,17 @@ defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ> //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// + multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm NAME: avx512_unary_rm_vl, XS; } -defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, WriteFShuffle>; -defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, WriteFShuffle>; +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, + SchedWriteFShuffle>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, + SchedWriteFShuffle>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP @@ -9840,25 +9859,25 @@ multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { - - defm Z : avx512_unary_rm, EVEX_V512; + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> { + defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_unary_rm, - EVEX_V256; - defm Z128 : avx512_movddup_128, - EVEX_V128; + defm Z256 : avx512_unary_rm, EVEX_V256; + defm Z128 : avx512_movddup_128, EVEX_V128; } } multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm NAME: avx512_movddup_common, XD, VEX_W; } -defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, WriteFShuffle>; +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>; let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), @@ -10041,10 +10060,11 @@ defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; //===----------------------------------------------------------------------===// multiclass avx512_shufp{ + AVX512VLVectorVTInfo VTInfo_FP>{ defm NAME: avx512_common_3Op_imm8, EVEX_CD8, - AVX512AIi8Base, EVEX_4V; + SchedWriteFShuffle>, + EVEX_CD8, + AVX512AIi8Base, EVEX_4V; } defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS; @@ -10750,7 +10770,7 @@ multiclass VBMI2_shift_var wOp, bits<8> dqOp, string Prefix, } multiclass VBMI2_shift_imm wOp, bits<8> dqOp, string Prefix, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, X86SchedWriteWidths sched> { defm W : avx512_common_3Op_rm_imm8, VEX_W, EVEX_CD8<16, CD8VF>; @@ -10763,8 +10783,8 @@ multiclass VBMI2_shift_imm wOp, bits<8> dqOp, string Prefix, // Concat & Shift defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, WriteVecIMul>; defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, WriteVecIMul>; -defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, WriteVecIMul>; -defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, WriteVecIMul>; +defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>; +defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>; // Compress defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, @@ -10829,9 +10849,9 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, WriteVecIMul>; //===----------------------------------------------------------------------===// // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? -defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, WriteVecALU, +defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU, avx512vl_i8_info, HasBITALG>; -defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, WriteVecALU, +defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU, avx512vl_i16_info, HasBITALG>, VEX_W; defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 51e8955..f9774ac 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -179,14 +179,14 @@ multiclass sse12_move_rr, - Sched<[WriteFShuffle]>; + Sched<[SchedWriteFShuffle.XMM]>; // For the disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(base_opc, asm_opr), []>, - Sched<[WriteFShuffle]>, FoldGenData; + Sched<[SchedWriteFShuffle.XMM]>, FoldGenData; } multiclass sse12_moveopc, SDNode psnode, SDNode pdnode, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], SSEPackedSingle>, PS, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; def PDrm : PIopc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], SSEPackedDouble>, PD, - Sched<[WriteFShuffleLd, ReadAfterLd]>; - + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; } multiclass sse12_mov_hilo_packedopc, SDNode psnode, SDNode pdnode, @@ -860,13 +859,13 @@ let AddedComplexity = 20, Predicates = [UseAVX] in { "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, - VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG; + VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, - VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG; + VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -874,14 +873,14 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>, - Sched<[WriteFShuffle]>; + Sched<[SchedWriteFShuffle.XMM]>; let isCommutable = 1 in def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>, - Sched<[WriteFShuffle]>; + Sched<[SchedWriteFShuffle.XMM]>; } //===----------------------------------------------------------------------===// @@ -2128,24 +2127,28 @@ multiclass sse12_shuffle, PS, VEX_4V, VEX_WIG; + loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, + PS, VEX_4V, VEX_WIG; defm VSHUFPSY : sse12_shuffle, PS, VEX_4V, VEX_L, VEX_WIG; + loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>, + PS, VEX_4V, VEX_L, VEX_WIG; defm VSHUFPD : sse12_shuffle, PD, VEX_4V, VEX_WIG; + loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, + PD, VEX_4V, VEX_WIG; defm VSHUFPDY : sse12_shuffle, PD, VEX_4V, VEX_L, VEX_WIG; + loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>, + PD, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle, PS; + memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; defm SHUFPD : sse12_shuffle, PD; + memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; } //===----------------------------------------------------------------------===// @@ -2175,44 +2178,44 @@ multiclass sse12_unpack_interleave opc, SDNode OpNode, ValueType vt, let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; + SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG; defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; + SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG; defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG; defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - WriteFShuffle, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; + SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG; }// Predicates = [HasAVX, NoVLX] let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", - WriteFShuffle, SSEPackedSingle>, PS; + SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", - WriteFShuffle, SSEPackedDouble, 1>, PD; + SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", - WriteFShuffle, SSEPackedSingle>, PS; + SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", - WriteFShuffle, SSEPackedDouble>, PD; + SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; } // Constraints = "$src1 = $dst" let Predicates = [HasAVX1Only] in { @@ -4257,31 +4260,35 @@ let AddedComplexity = 20 in { multiclass sse3_replicate_sfp op, SDNode OpNode, string OpcodeStr, ValueType vt, RegisterClass RC, PatFrag mem_frag, - X86MemOperand x86memop> { + X86MemOperand x86memop, X86FoldableSchedWrite sched> { def rr : S3SI, - Sched<[WriteFShuffle]>; + Sched<[sched]>; def rm : S3SI, - Sched<[WriteFShuffleLd]>; + Sched<[sched.Folded]>; } let Predicates = [HasAVX, NoVLX] in { defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG; + v4f32, VR128, loadv4f32, f128mem, + SchedWriteFShuffle.XMM>, VEX, VEX_WIG; defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG; + v4f32, VR128, loadv4f32, f128mem, + SchedWriteFShuffle.XMM>, VEX, VEX_WIG; defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", - v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG; + v8f32, VR256, loadv8f32, f256mem, + SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", - v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG; + v8f32, VR256, loadv8f32, f256mem, + SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG; } defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, - memopv4f32, f128mem>; + memopv4f32, f128mem, SchedWriteFShuffle.XMM>; defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, - memopv4f32, f128mem>; + memopv4f32, f128mem, SchedWriteFShuffle.XMM>; let Predicates = [HasAVX, NoVLX] in { def : Pat<(v4i32 (X86Movshdup VR128:$src)), @@ -4317,38 +4324,40 @@ let Predicates = [UseSSE3] in { // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// -multiclass sse3_replicate_dfp { +multiclass sse3_replicate_dfp { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))]>, - Sched<[WriteFShuffle]>; + Sched<[sched.XMM]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))]>, - Sched<[WriteFShuffleLd]>; + Sched<[sched.XMM.Folded]>; } // FIXME: Merge with above classes when there are patterns for the ymm version -multiclass sse3_replicate_dfp_y { +multiclass sse3_replicate_dfp_y { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, - Sched<[WriteFShuffle]>; + Sched<[sched.YMM]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>, - Sched<[WriteFShuffleLd]>; + Sched<[sched.YMM.Folded]>; } let Predicates = [HasAVX, NoVLX] in { - defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX, VEX_WIG; - defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L, VEX_WIG; + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>, + VEX, VEX_WIG; + defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>, + VEX, VEX_L, VEX_WIG; } -defm MOVDDUP : sse3_replicate_dfp<"movddup">; +defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; let Predicates = [HasAVX, NoVLX] in { @@ -5309,7 +5318,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>, - Sched<[WriteFShuffle]>; + Sched<[SchedWriteFShuffle.XMM]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1> { (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; } let ExeDomain = SSEPackedSingle in { @@ -6886,19 +6895,20 @@ class avx2_broadcast_rr opc, string OpcodeStr, RegisterClass RC, let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in { def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128, - f32mem, v4f32, loadf32, - WriteFShuffleLd>; + f32mem, v4f32, loadf32, + SchedWriteFShuffle.XMM.Folded>; def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256, - f32mem, v8f32, loadf32, - WriteFShuffleLd>, VEX_L; + f32mem, v8f32, loadf32, + SchedWriteFShuffle.XMM.Folded>, VEX_L; } let ExeDomain = SSEPackedDouble, Predicates = [HasAVX, NoVLX] in def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem, - v4f64, loadf64, WriteFShuffleLd>, VEX_L; + v4f64, loadf64, + SchedWriteFShuffle.XMM.Folded>, VEX_L; let ExeDomain = SSEPackedSingle, Predicates = [HasAVX2, NoVLX] in { def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128, - v4f32, v4f32, WriteFShuffle>; + v4f32, v4f32, SchedWriteFShuffle.XMM>; def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256, v8f32, v4f32, WriteFShuffle256>, VEX_L; } @@ -6930,7 +6940,7 @@ let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX], def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, - Sched<[WriteFShuffleLd]>, VEX, VEX_L; + Sched<[SchedWriteFShuffle.XMM.Folded]>, VEX, VEX_L; let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), @@ -7127,18 +7137,18 @@ multiclass avx_permil opc_rm, bits<8> opc_rmi, string OpcodeStr, let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - loadv2i64, v4f32, v4i32, WriteFShuffle, + loadv2i64, v4f32, v4i32, SchedWriteFShuffle.XMM, SchedWriteFVarShuffle.XMM>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - loadv4i64, v8f32, v8i32, WriteFShuffle, + loadv4i64, v8f32, v8i32, SchedWriteFShuffle.YMM, SchedWriteFVarShuffle.YMM>, VEX_L; } let ExeDomain = SSEPackedDouble in { defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - loadv2i64, v2f64, v2i64, WriteFShuffle, + loadv2i64, v2f64, v2i64, SchedWriteFShuffle.XMM, SchedWriteFVarShuffle.XMM>; defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - loadv4i64, v4f64, v4i64, WriteFShuffle, + loadv4i64, v4f64, v4i64, SchedWriteFShuffle.YMM, SchedWriteFVarShuffle.YMM>, VEX_L; } -- 2.7.4