From 6ca7398a1e9481c950d60f2200b297fd9c2d67a7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 28 Feb 2019 07:21:26 +0000 Subject: [PATCH] [X86] Use PreprocessISelDAG to convert vector sra/srl/shl to the X86 specific variable shift ISD opcodes. These allows use to use the same set of isel patterns for sra/srl/shl which are undefined for out of range shifts and intrinsic shifts which aren't undefined. Doing this late allows DAG combine to have every opportunity to optimize the sra/srl/shl nodes. This removes about 7000 bytes from the isel table and simplies the td files. llvm-svn: 355071 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 24 +++++++ llvm/lib/Target/X86/X86InstrAVX512.td | 116 +++----------------------------- llvm/lib/Target/X86/X86InstrSSE.td | 21 ++---- 3 files changed, 40 insertions(+), 121 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index e29c8dc..e9a599f 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -753,6 +753,30 @@ void X86DAGToDAGISel::PreprocessISelDAG() { continue; } + // Replace vector shifts with their X86 specific equivalent so we don't + // need 2 sets of patterns. + switch (N->getOpcode()) { + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + if (N->getValueType(0).isVector()) { + unsigned NewOpc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::SHL: NewOpc = X86ISD::VSHLV; break; + case ISD::SRA: NewOpc = X86ISD::VSRAV; break; + case ISD::SRL: NewOpc = X86ISD::VSRLV; break; + } + SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1)); + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + ++I; + CurDAG->DeleteNode(N); + continue; + } + } + if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c82d170..6e25f77 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6427,118 +6427,22 @@ multiclass avx512_var_shift_w opc, string OpcodeStr, } } -defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>, - avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>; +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>, + avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>; -defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>, - avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>, + avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>; -defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>, - avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>, + avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>; defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>; defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; -defm : avx512_var_shift_lowering; - -// Special handing for handling VPSRAV intrinsics. -multiclass avx512_var_shift_int_lowering p> { - let Predicates = p in { - def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)), - (!cast(InstrStr#_.ZSuffix#rr) _.RC:$src1, - _.RC:$src2)>; - def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), - (!cast(InstrStr#_.ZSuffix##rm) - _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)), - (!cast(InstrStr#_.ZSuffix#rrk) _.RC:$src0, - _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), - _.RC:$src0)), - (!cast(InstrStr#_.ZSuffix##rmk) _.RC:$src0, - _.KRC:$mask, _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), - (!cast(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, - _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), - _.ImmAllZerosV)), - (!cast(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, - _.RC:$src1, addr:$src2)>; - } -} - -multiclass avx512_var_shift_int_lowering_mb p> : - avx512_var_shift_int_lowering { - let Predicates = p in { - def : Pat<(_.VT (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), - (!cast(InstrStr#_.ZSuffix##rmb) - _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2))), - _.RC:$src0)), - (!cast(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, - _.KRC:$mask, _.RC:$src1, addr:$src2)>; - def : Pat<(_.VT (vselect _.KRCWM:$mask, - (OpNode _.RC:$src1, - (X86VBroadcast (_.ScalarLdFrag addr:$src2))), - _.ImmAllZerosV)), - (!cast(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, - _.RC:$src1, addr:$src2)>; - } -} - -multiclass avx512_var_shift_int_lowering_vl { - defm : avx512_var_shift_int_lowering; - defm : avx512_var_shift_int_lowering; - defm : avx512_var_shift_int_lowering; -} - -multiclass avx512_var_shift_int_lowering_mb_vl { - defm : avx512_var_shift_int_lowering_mb; - defm : avx512_var_shift_int_lowering_mb; - defm : avx512_var_shift_int_lowering_mb; -} - -defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info, - HasBWI>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav, - avx512vl_i32_info, HasAVX512>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav, - avx512vl_i64_info, HasAVX512>; - -defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info, - HasBWI>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv, - avx512vl_i32_info, HasAVX512>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv, - avx512vl_i64_info, HasAVX512>; - -defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info, - HasBWI>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv, - avx512vl_i32_info, HasAVX512>; -defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv, - avx512vl_i64_info, HasAVX512>; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; +defm : avx512_var_shift_lowering; // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7f08518..cac557ed 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8371,7 +8371,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))), // Variable Bit Shifts // multiclass avx2_var_shift opc, string OpcodeStr, SDNode OpNode, - SDNode IntrinNode, ValueType vt128, ValueType vt256> { + ValueType vt128, ValueType vt256> { def rr : AVX28I opc, string OpcodeStr, SDNode OpNode, (vt256 (load addr:$src2)))))]>, VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, SchedWriteVarVecShift.YMM.ReadAfterFold]>; - - def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)), - (!cast(NAME#"rr") VR128:$src1, VR128:$src2)>; - def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))), - (!cast(NAME#"rm") VR128:$src1, addr:$src2)>; - def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)), - (!cast(NAME#"Yrr") VR256:$src1, VR256:$src2)>; - def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))), - (!cast(NAME#"Yrm") VR256:$src1, addr:$src2)>; } let Predicates = [HasAVX2, NoVLX] in { - defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>; - defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W; - defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>; - defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W; - defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>; + defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>; + defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W; + defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>; + defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W; + defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>; } //===----------------------------------------------------------------------===// -- 2.7.4