From: Simon Pilgrim Date: Sat, 5 Nov 2016 16:40:20 +0000 (+0000) Subject: [X86][SSE] Reuse zeroable element mask instead of regenerating it. NFCI X-Git-Tag: llvmorg-4.0.0-rc1~5429 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1af0fc1103591c35cca8048dae9776a6c329ecd1;p=platform%2Fupstream%2Fllvm.git [X86][SSE] Reuse zeroable element mask instead of regenerating it. NFCI We are repeatedly calling computeZeroableShuffleElements in many shuffle lowering calls for the same shuffle mask/inputs. This is a first step towards reusing the zeroable result, initially just for lowerVectorShuffleAsShift calls. llvm-svn: 286037 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 98727d0..29aaa0d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7955,10 +7955,9 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT, /// [ 1, 2, -1, -1, -1, -1, zz, zz] static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); - int Size = Mask.size(); assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -9131,6 +9130,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// it falls back to the floating point shuffle operation with appropriate bit /// casting. static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -9183,7 +9183,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // When loading a scalar and then shuffling it into a vector we can often do @@ -9430,6 +9430,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -9470,7 +9471,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // There are special ways we can lower some single-element blends. @@ -10054,6 +10055,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10077,7 +10079,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Use dedicated unpack instructions for masks that match their pattern. @@ -10103,7 +10105,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // See if we can use SSE4A Extraction / Insertion. @@ -10236,6 +10238,7 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask, /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -10245,7 +10248,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -10524,21 +10527,22 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// dispatches to the lowering routines accordingly. static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { switch (VT.SimpleTy) { case MVT::v2i64: - return lowerV2I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v2f64: return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); case MVT::v4i32: - return lowerV4I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4f32: return lowerV4F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); case MVT::v8i16: - return lowerV8I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i8: - return lowerV16I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Unimplemented!"); @@ -11446,6 +11450,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11491,7 +11496,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, @@ -11608,6 +11613,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11650,7 +11656,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -11687,6 +11693,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11718,7 +11725,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -11772,6 +11779,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11803,7 +11811,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -11844,6 +11852,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// together based on the available instructions. static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // If we have a single input to the zero element, insert that into V1 if we @@ -11890,15 +11899,15 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, case MVT::v4f64: return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); case MVT::v4i64: - return lowerV4I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8f32: return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); case MVT::v8i32: - return lowerV8I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i16: - return lowerV16I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i8: - return lowerV32I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 256-bit x86 vector type!"); @@ -12039,6 +12048,7 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef Mask, /// \brief Handle lowering of 8-lane 64-bit integer shuffles. static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12073,7 +12083,7 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, @@ -12089,6 +12099,7 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// \brief Handle lowering of 16-lane 32-bit integer shuffles. static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12122,7 +12133,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -12136,6 +12147,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// \brief Handle lowering of 32-lane 16-bit integer shuffles. static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12159,7 +12171,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -12183,6 +12195,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// \brief Handle lowering of 64-lane 8-bit integer shuffles. static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -12205,7 +12218,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. @@ -12228,6 +12241,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// together based on the available instructions. static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, SDValue V1, SDValue V2, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && @@ -12258,13 +12272,13 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, case MVT::v16f32: return lowerV16F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); case MVT::v8i64: - return lowerV8I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i32: - return lowerV16I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i16: - return lowerV32I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v64i8: - return lowerV64I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 512-bit x86 vector type!"); @@ -12480,13 +12494,16 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // For each vector width, delegate to a specialized lowering routine. if (VT.is128BitVector()) - return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG); + return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, + DAG); if (VT.is256BitVector()) - return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG); + return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, + DAG); if (VT.is512BitVector()) - return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG); + return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, + DAG); if (Is1BitVector) return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);