From 6f64daca8f3cbcf850ce7d502291017187e9fc08 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 15 Apr 2020 12:41:54 +0200 Subject: [PATCH] Upgrade calls to CreateShuffleVector to use the preferred form of passing an array of ints No functionality change intended. --- clang/lib/CodeGen/CGBuiltin.cpp | 113 +++++++++------------ clang/lib/CodeGen/CGExpr.cpp | 46 ++++----- clang/lib/CodeGen/CGExprScalar.cpp | 20 ++-- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp | 2 +- llvm/lib/Target/X86/X86PartialReduction.cpp | 14 +-- .../Transforms/InstCombine/InstCombineCalls.cpp | 6 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 2 +- .../InstCombine/InstCombineVectorOps.cpp | 7 +- .../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 12 +-- llvm/lib/Transforms/Scalar/SROA.cpp | 8 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 10 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 20 ++-- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 +-- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 9 +- 14 files changed, 117 insertions(+), 164 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6e3a3df..87f5210 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5711,7 +5711,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast(Ops[2])->getSExtValue(); - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(i+CV); @@ -5983,7 +5983,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); @@ -6011,7 +6011,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); @@ -6029,7 +6029,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); @@ -6120,7 +6120,7 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, TblOps.push_back(ExtOp); // Build a vector containing sequential number like (0, 1, 2, ..., 15) - SmallVector Indices; + SmallVector Indices; llvm::VectorType *TblTy = cast(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { Indices.push_back(2*i); @@ -6957,7 +6957,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, if (VTy->getElementType()->isIntegerTy(64)) { // Extract the other lane. Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - uint32_t Lane = cast(Ops[2])->getZExtValue(); + int Lane = cast(Ops[2])->getZExtValue(); Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); // Load the value as a one-element vector. @@ -6967,9 +6967,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Value *Align = getAlignmentValue32(PtrOp0); Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. - uint32_t Indices[] = {1 - Lane, Lane}; - SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); - return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); + int Indices[] = {1 - Lane, Lane}; + return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); } LLVM_FALLTHROUGH; case NEON::BI__builtin_neon_vld1_lane_v: { @@ -7144,7 +7143,7 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { // Make a shufflevector that extracts every other element of a vector (evens // or odds, as desired). - SmallVector Indices; + SmallVector Indices; unsigned InputElements = cast(V->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i += 2) @@ -7157,7 +7156,7 @@ static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1) { // Make a shufflevector that interleaves two vectors element by element. assert(V0->getType() == V1->getType() && "Can't zip different vector types"); - SmallVector Indices; + SmallVector Indices; unsigned InputElements = cast(V0->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i++) { @@ -7185,7 +7184,7 @@ static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, unsigned ReverseWidth) { // MVE-specific helper function which reverses the elements of a // vector within every (ReverseWidth)-bit collection of lanes. - SmallVector Indices; + SmallVector Indices; unsigned LaneSize = V->getType()->getScalarSizeInBits(); unsigned Elements = 128 / LaneSize; unsigned Mask = ReverseWidth / LaneSize - 1; @@ -9971,7 +9970,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); @@ -9990,7 +9989,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); @@ -10008,7 +10007,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { - SmallVector Indices; + SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); @@ -10132,7 +10131,7 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. if (NumElts < 8) { - uint32_t Indices[4]; + int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, @@ -10321,7 +10320,7 @@ static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, } if (NumElts < 8) { - uint32_t Indices[8]; + int Indices[8]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; for (unsigned i = NumElts; i != 8; ++i) @@ -10661,9 +10660,8 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, // Extract the subvector. if (NumDstElts != cast(Src->getType())->getNumElements()) { assert(NumDstElts == 4 && "Unexpected vector size"); - uint32_t ShuffleMask[4] = {0, 1, 2, 3}; Src = CGF.Builder.CreateShuffleVector(Src, UndefValue::get(Src->getType()), - ShuffleMask); + ArrayRef{0, 1, 2, 3}); } // Bitcast from vXi16 to vXf16. @@ -11545,7 +11543,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Index &= SubVectors - 1; // Remove any extra bits. Index *= NumElts; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + Index; @@ -11585,7 +11583,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Index &= SubVectors - 1; // Remove any extra bits. Index *= SrcNumElts; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != DstNumElts; ++i) Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; @@ -11646,7 +11644,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, cast(Ops[0]->getType())->getNumElements(); unsigned Imm = cast(Ops[2])->getZExtValue(); - uint32_t Indices[16]; + int Indices[16]; // If there are more than 8 elements, the immediate is used twice so make // sure we handle that. for (unsigned i = 0; i != NumElts; ++i) @@ -11666,7 +11664,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[32]; + int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) { Indices[l + i] = l + (Imm & 3); @@ -11690,7 +11688,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[32]; + int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + i; @@ -11722,7 +11720,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { Indices[i + l] = (Imm % NumLaneElts) + l; @@ -11773,7 +11771,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned NumElts = Ty->getNumElements(); // These intrinsics operate on 256-bit lanes of four 64-bit elements. - uint32_t Indices[8]; + int Indices[8]; for (unsigned l = 0; l != NumElts; l += 4) for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); @@ -11804,7 +11802,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } - uint32_t Indices[64]; + int Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11832,7 +11830,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Mask the shift amount to width of two vectors. ShiftVal &= (2 * NumElts) - 1; - uint32_t Indices[16]; + int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; @@ -11854,7 +11852,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; unsigned NumLaneElts = NumElts / NumLanes; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { unsigned Index = (Imm % NumLanes) * NumLaneElts; Imm /= NumLanes; // Discard the bits we just used. @@ -11884,7 +11882,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // duplicate sources, but this can be dealt with in the backend. Value *OutOps[2]; - uint32_t Indices[8]; + int Indices[8]; for (unsigned l = 0; l != 2; ++l) { // Determine the source for this lane. if (Imm & (1 << ((l * 4) + 3))) @@ -11922,7 +11920,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); - uint32_t Indices[64]; + int Indices[64]; // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11952,7 +11950,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); - uint32_t Indices[64]; + int Indices[64]; // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -11982,7 +11980,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *In = getMaskVecValue(*this, Ops[0], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = NumElts + i - ShiftVal; @@ -12004,7 +12002,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Value *In = getMaskVecValue(*this, Ops[0], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; @@ -12284,7 +12282,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; @@ -13443,15 +13441,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { - // Create a shuffle mask of (1, 0) - Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), - ConstantInt::get(Int32Ty, 0) - }; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); - // Reverse the double words in the vector we will extract from. Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); - Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask); + Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef{1, 0}); // Reverse the index. Index = MaxIndex - Index; @@ -13485,13 +13477,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Emit the call, then reverse the double words of the results vector. Value *Call = Builder.CreateCall(F, Ops); - // Create a shuffle mask of (1, 0) - Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1), - ConstantInt::get(Int32Ty, 0) - }; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); - - Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask); + Value *ShuffleCall = + Builder.CreateShuffleVector(Call, Call, ArrayRef{1, 0}); return ShuffleCall; } else { Ops[1] = ConstantInt::getSigned(Int32Ty, Index); @@ -13510,15 +13497,12 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in // both cases. - unsigned ElemIdx0 = (Index & 2) >> 1; - unsigned ElemIdx1 = 2 + (Index & 1); - - Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), - ConstantInt::get(Int32Ty, ElemIdx1)}; - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + int ElemIdx0 = (Index & 2) >> 1; + int ElemIdx1 = 2 + (Index & 1); + int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); @@ -13532,10 +13516,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); // Create a shuffle mask - unsigned ElemIdx0; - unsigned ElemIdx1; - unsigned ElemIdx2; - unsigned ElemIdx3; + int ElemIdx0; + int ElemIdx1; + int ElemIdx2; + int ElemIdx3; if (getTarget().isLittleEndian()) { // Little endian element N comes from element 8+N-Index of the // concatenated wide vector (of course, using modulo arithmetic on @@ -13552,14 +13536,9 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, ElemIdx3 = Index + 3; } - Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), - ConstantInt::get(Int32Ty, ElemIdx1), - ConstantInt::get(Int32Ty, ElemIdx2), - ConstantInt::get(Int32Ty, ElemIdx3)}; - - Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; Value *ShuffleCall = - Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index cd95e78..bafe68d2 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1745,12 +1745,9 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, // Handle vec3 special. if (VecTy && VecTy->getNumElements() == 3) { // Our source is a vec3, do a shuffle vector to make it a vec4. - llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1), - Builder.getInt32(2), - llvm::UndefValue::get(Builder.getInt32Ty())}; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy), - MaskV, "extractVec"); + ArrayRef{0, 1, 2, -1}, + "extractVec"); SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); } if (Addr.getElementType() != SrcTy) { @@ -1886,13 +1883,12 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { // Always use shuffle vector to try to retain the original program structure unsigned NumResultElts = ExprVT->getNumElements(); - SmallVector Mask; + SmallVector Mask; for (unsigned i = 0; i != NumResultElts; ++i) - Mask.push_back(Builder.getInt32(getAccessedFieldNo(i, Elts))); + Mask.push_back(getAccessedFieldNo(i, Elts)); - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); Vec = Builder.CreateShuffleVector(Vec, llvm::UndefValue::get(Vec->getType()), - MaskV); + Mask); return RValue::get(Vec); } @@ -2133,32 +2129,27 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // Use shuffle vector is the src and destination are the same number of // elements and restore the vector mask since it is on the side it will be // stored. - SmallVector Mask(NumDstElts); + SmallVector Mask(NumDstElts); for (unsigned i = 0; i != NumSrcElts; ++i) - Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i); + Mask[getAccessedFieldNo(i, Elts)] = i; - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Vec = Builder.CreateShuffleVector(SrcVal, - llvm::UndefValue::get(Vec->getType()), - MaskV); + Vec = Builder.CreateShuffleVector( + SrcVal, llvm::UndefValue::get(Vec->getType()), Mask); } else if (NumDstElts > NumSrcElts) { // Extended the source vector to the same length and then shuffle it // into the destination. // FIXME: since we're shuffling with undef, can we just use the indices // into that? This could be simpler. - SmallVector ExtMask; + SmallVector ExtMask; for (unsigned i = 0; i != NumSrcElts; ++i) - ExtMask.push_back(Builder.getInt32(i)); - ExtMask.resize(NumDstElts, llvm::UndefValue::get(Int32Ty)); - llvm::Value *ExtMaskV = llvm::ConstantVector::get(ExtMask); - llvm::Value *ExtSrcVal = - Builder.CreateShuffleVector(SrcVal, - llvm::UndefValue::get(SrcVal->getType()), - ExtMaskV); + ExtMask.push_back(i); + ExtMask.resize(NumDstElts, -1); + llvm::Value *ExtSrcVal = Builder.CreateShuffleVector( + SrcVal, llvm::UndefValue::get(SrcVal->getType()), ExtMask); // build identity - SmallVector Mask; + SmallVector Mask; for (unsigned i = 0; i != NumDstElts; ++i) - Mask.push_back(Builder.getInt32(i)); + Mask.push_back(i); // When the vector size is odd and .odd or .hi is used, the last element // of the Elts constant array will be one past the size of the vector. @@ -2168,9 +2159,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src, // modify when what gets shuffled in for (unsigned i = 0; i != NumSrcElts; ++i) - Mask[getAccessedFieldNo(i, Elts)] = Builder.getInt32(i+NumDstElts); - llvm::Value *MaskV = llvm::ConstantVector::get(Mask); - Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, MaskV); + Mask[getAccessedFieldNo(i, Elts)] = i + NumDstElts; + Vec = Builder.CreateShuffleVector(Vec, ExtSrcVal, Mask); } else { // We should never shorten the vector llvm_unreachable("unexpected shorten vector length"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index f3f826e..c4f1faa 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -1650,18 +1650,17 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) { Value* V1 = CGF.EmitScalarExpr(E->getExpr(0)); Value* V2 = CGF.EmitScalarExpr(E->getExpr(1)); - SmallVector indices; + SmallVector Indices; for (unsigned i = 2; i < E->getNumSubExprs(); ++i) { llvm::APSInt Idx = E->getShuffleMaskIdx(CGF.getContext(), i-2); // Check for -1 and output it as undef in the IR. if (Idx.isSigned() && Idx.isAllOnesValue()) - indices.push_back(llvm::UndefValue::get(CGF.Int32Ty)); + Indices.push_back(-1); else - indices.push_back(Builder.getInt32(Idx.getZExtValue())); + Indices.push_back(Idx.getZExtValue()); } - Value *SV = llvm::ConstantVector::get(indices); - return Builder.CreateShuffleVector(V1, V2, SV, "shuffle"); + return Builder.CreateShuffleVector(V1, V2, Indices, "shuffle"); } Value *ScalarExprEmitter::VisitConvertVectorExpr(ConvertVectorExpr *E) { @@ -4532,14 +4531,9 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) { static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF, Value *Src, unsigned NumElementsDst) { llvm::Value *UnV = llvm::UndefValue::get(Src->getType()); - SmallVector Args; - Args.push_back(Builder.getInt32(0)); - Args.push_back(Builder.getInt32(1)); - Args.push_back(Builder.getInt32(2)); - if (NumElementsDst == 4) - Args.push_back(llvm::UndefValue::get(CGF.Int32Ty)); - llvm::Constant *Mask = llvm::ConstantVector::get(Args); - return Builder.CreateShuffleVector(Src, UnV, Mask); + static constexpr int Mask[] = {0, 1, 2, -1}; + return Builder.CreateShuffleVector(Src, UnV, + llvm::makeArrayRef(Mask, NumElementsDst)); } // Create cast instructions for converting LLVM value \p Src to LLVM type \p diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 53de86c..64a8ff3 100644 --- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -1230,7 +1230,7 @@ bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad, // Create the final SVIs and replace all uses. int i = 0; for (auto &VI : InterleavedLoad) { - SmallVector Mask; + SmallVector Mask; for (unsigned j = 0; j < ElementsPerSVI; j++) Mask.push_back(i + j * Factor); diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp index 26c05ea..503ea77 100644 --- a/llvm/lib/Target/X86/X86PartialReduction.cpp +++ b/llvm/lib/Target/X86/X86PartialReduction.cpp @@ -242,8 +242,8 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) { // Extract even elements and odd elements and add them together. This will // be pattern matched by SelectionDAG to pmaddwd. This instruction will be // half the original width. - SmallVector EvenMask(NumElts / 2); - SmallVector OddMask(NumElts / 2); + SmallVector EvenMask(NumElts / 2); + SmallVector OddMask(NumElts / 2); for (int i = 0, e = NumElts / 2; i != e; ++i) { EvenMask[i] = i * 2; OddMask[i] = i * 2 + 1; @@ -253,7 +253,7 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) { Value *MAdd = Builder.CreateAdd(EvenElts, OddElts); // Concatenate zeroes to extend back to the original type. - SmallVector ConcatMask(NumElts); + SmallVector ConcatMask(NumElts); std::iota(ConcatMask.begin(), ConcatMask.end(), 0); Value *Zero = Constant::getNullValue(MAdd->getType()); Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask); @@ -339,7 +339,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) { if (NumElts < 16) { // Pad input with zeroes. - SmallVector ConcatMask(16); + SmallVector ConcatMask(16); for (unsigned i = 0; i != NumElts; ++i) ConcatMask[i] = i; for (unsigned i = NumElts; i != 16; ++i) @@ -360,7 +360,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) { // First collect the pieces we need. SmallVector Ops(NumSplits); for (unsigned i = 0; i != NumSplits; ++i) { - SmallVector ExtractMask(IntrinsicNumElts); + SmallVector ExtractMask(IntrinsicNumElts); std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts); Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask); Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask); @@ -373,7 +373,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) { for (unsigned s = Stages; s > 0; --s) { unsigned NumConcatElts = Ops[0]->getType()->getVectorNumElements() * 2; for (unsigned i = 0; i != 1U << (s - 1); ++i) { - SmallVector ConcatMask(NumConcatElts); + SmallVector ConcatMask(NumConcatElts); std::iota(ConcatMask.begin(), ConcatMask.end(), 0); Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask); } @@ -386,7 +386,7 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) { // Extract down to 2 elements. Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef{0, 1}); } else if (NumElts >= 8) { - SmallVector ConcatMask(NumElts); + SmallVector ConcatMask(NumElts); unsigned SubElts = Ops[0]->getType()->getVectorNumElements(); for (unsigned i = 0; i != SubElts; ++i) ConcatMask[i] = i; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 377d0be..28236ad 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -416,7 +416,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, Amt, DemandedUpper, II.getModule()->getDataLayout()); if (KnownLowerBits.getMaxValue().ult(BitWidth) && (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) { - SmallVector ZeroSplat(VWidth, 0); + SmallVector ZeroSplat(VWidth, 0); Amt = Builder.CreateShuffleVector(Amt, Amt, ZeroSplat); return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt) : Builder.CreateLShr(Vec, Amt)) @@ -663,7 +663,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1); // Shuffle clamped args together at the lane level. - SmallVector PackMask; + SmallVector PackMask; for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt) PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane)); @@ -760,7 +760,7 @@ static Value *simplifyX86insertps(const IntrinsicInst &II, return ZeroVector; // Initialize by passing all of the first source bits through. - uint32_t ShuffleMask[4] = { 0, 1, 2, 3 }; + int ShuffleMask[4] = {0, 1, 2, 3}; // We may replace the second operand with the zero vector. Value *V1 = II.getArgOperand(1); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a4565a3..543f768 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1158,7 +1158,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II, DemandedElts.countTrailingZeros()); } - SmallVector EltMask; + SmallVector EltMask; unsigned NewLoadIdx = 0; for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) { if (!!DemandedElts[OrigLoadIdx]) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b6f01d3..8ed7ec2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2099,12 +2099,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (!BegIsAligned) { // Shuffle the input so [0,NumElements) contains the output, and // [NumElems,SrcNumElems) is undef. - SmallVector ShuffleMask(SrcNumElems, - UndefValue::get(Int32Ty)); + SmallVector ShuffleMask(SrcNumElems, -1); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) - ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); + ShuffleMask[I] = Idx; V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(ShuffleMask), + ShuffleMask, SVI.getName() + ".extract"); BegIdx = 0; } diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index c1861a6..a2ddf85 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -916,21 +916,19 @@ public: // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7, // 8, 4, 5, 6 - SmallVector Mask; + SmallVector Mask; unsigned i; for (i = 0; i < I; i++) - Mask.push_back(Builder.getInt32(i)); + Mask.push_back(i); unsigned VecNumElts = cast(Col->getType())->getNumElements(); for (; i < I + BlockNumElts; i++) - Mask.push_back(Builder.getInt32(i - I + VecNumElts)); + Mask.push_back(i - I + VecNumElts); for (; i < VecNumElts; i++) - Mask.push_back(Builder.getInt32(i)); + Mask.push_back(i); - Value *MaskVal = ConstantVector::get(Mask); - - return Builder.CreateShuffleVector(Col, Block, MaskVal); + return Builder.CreateShuffleVector(Col, Block, Mask); } Value *createMulAdd(Value *Sum, Value *A, Value *B, bool UseFPOp, diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 808b5a4..d25ad4b 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2207,12 +2207,12 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, return V; } - SmallVector Mask; + SmallVector Mask; Mask.reserve(NumElements); for (unsigned i = BeginIndex; i != EndIndex; ++i) - Mask.push_back(IRB.getInt32(i)); - V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(Mask), Name + ".extract"); + Mask.push_back(i); + V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), Mask, + Name + ".extract"); LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); return V; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 0c94597..add9133 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -917,19 +917,17 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, assert(isPowerOf2_32(VF) && "Reduction emission only supported for pow2 vectors!"); Value *TmpVec = Src; - SmallVector ShuffleMask(VF, nullptr); + SmallVector ShuffleMask(VF); for (unsigned i = VF; i != 1; i >>= 1) { // Move the upper half of the vector to the lower half. for (unsigned j = 0; j != i / 2; ++j) - ShuffleMask[j] = Builder.getInt32(i / 2 + j); + ShuffleMask[j] = i / 2 + j; // Fill the rest of the mask with undef. - std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), - UndefValue::get(Builder.getInt32Ty())); + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1); Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), - ConstantVector::get(ShuffleMask), "rdx.shuf"); + TmpVec, UndefValue::get(TmpVec->getType()), ShuffleMask, "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { // The builder propagates its fast-math-flags setting. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0e50795..0107f5a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2128,13 +2128,12 @@ void InnerLoopVectorizer::packScalarIntoVectorValue( Value *InnerLoopVectorizer::reverseVector(Value *Vec) { assert(Vec->getType()->isVectorTy() && "Invalid type"); - SmallVector ShuffleMask; + SmallVector ShuffleMask; for (unsigned i = 0; i < VF; ++i) - ShuffleMask.push_back(Builder.getInt32(VF - i - 1)); + ShuffleMask.push_back(VF - i - 1); return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), - ConstantVector::get(ShuffleMask), - "reverse"); + ShuffleMask, "reverse"); } // Return whether we allow using masked interleave-groups (for dealing with @@ -3628,10 +3627,10 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // We will construct a vector for the recurrence by combining the values for // the current and previous iterations. This is the required shuffle mask. - SmallVector ShuffleMask(VF); - ShuffleMask[0] = Builder.getInt32(VF - 1); + SmallVector ShuffleMask(VF); + ShuffleMask[0] = VF - 1; for (unsigned I = 1; I < VF; ++I) - ShuffleMask[I] = Builder.getInt32(I + VF - 1); + ShuffleMask[I] = I + VF - 1; // The vector from which to take the initial value for the current iteration // (actual or unrolled). Initially, this is the vector phi node. @@ -3641,10 +3640,9 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { for (unsigned Part = 0; Part < UF; ++Part) { Value *PreviousPart = getOrCreateVectorValue(Previous, Part); Value *PhiPart = VectorLoopValueMap.getVectorValue(Phi, Part); - auto *Shuffle = - VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, - ConstantVector::get(ShuffleMask)) - : Incoming; + auto *Shuffle = VF > 1 ? Builder.CreateShuffleVector(Incoming, PreviousPart, + ShuffleMask) + : Incoming; PhiPart->replaceAllUsesWith(Shuffle); cast(PhiPart)->eraseFromParent(); VectorLoopValueMap.resetVectorValue(Phi, Part, Shuffle); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 07d9f40..794ca100 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1437,7 +1437,7 @@ private: return VL.size() == ReuseShuffleIndices.size() && std::equal( VL.begin(), VL.end(), ReuseShuffleIndices.begin(), - [this](Value *V, unsigned Idx) { return V == Scalars[Idx]; }); + [this](Value *V, int Idx) { return V == Scalars[Idx]; }); } /// A vector of scalars. @@ -1451,7 +1451,7 @@ private: EntryState State; /// Does this sequence require some shuffling? - SmallVector ReuseShuffleIndices; + SmallVector ReuseShuffleIndices; /// Does this entry require reordering? ArrayRef ReorderIndices; @@ -4027,9 +4027,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { V = SV->getOperand(0); } else { // Reshuffle to get only unique values. - SmallVector UniqueIdxs; - SmallSet UsedIdxs; - for(unsigned Idx : E->ReuseShuffleIndices) + SmallVector UniqueIdxs; + SmallSet UsedIdxs; + for (int Idx : E->ReuseShuffleIndices) if (UsedIdxs.insert(Idx).second) UniqueIdxs.emplace_back(Idx); V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), @@ -4046,7 +4046,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { ScalarTy = SI->getValueOperand()->getType(); // Check that every instruction appears once in this bundle. - SmallVector ReuseShuffleIndicies; + SmallVector ReuseShuffleIndicies; SmallVector UniqueValues; if (VL.size() > 2) { DenseMap UniquePositions; diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 9e7cbe8..e1d3a05 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -237,16 +237,13 @@ static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) { uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1; uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0; auto *VecTy = cast(V0->getType()); - Type *I32Ty = IntegerType::getInt32Ty(I.getContext()); - UndefValue *Undef = UndefValue::get(I32Ty); - SmallVector ShufMask(VecTy->getNumElements(), Undef); - ShufMask[CheapExtIndex] = ConstantInt::get(I32Ty, SplatIndex); + SmallVector ShufMask(VecTy->getNumElements(), -1); + ShufMask[CheapExtIndex] = SplatIndex; IRBuilder<> Builder(ConvertToShuffle); // extelt X, C --> extelt (splat X), C' Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0), - UndefValue::get(VecTy), - ConstantVector::get(ShufMask)); + UndefValue::get(VecTy), ShufMask); Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex); if (ConvertToShuffle == Ext0) Ext0 = cast(NewExt); -- 2.7.4