From de65b356dc2d9730b02ac21dd308cdc3ced0feac Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 22 Jun 2020 08:57:37 -0400 Subject: [PATCH] [VectorCombine] add/use pass-level IRBuilder This saves creating/destroying a builder every time we perform some transform. The tests show instruction ordering diffs resulting from always inserting at the root instruction now, but those should be benign. --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 29 ++++++++++++---------- .../PhaseOrdering/X86/vector-reductions.ll | 6 ++--- .../Transforms/VectorCombine/X86/extract-binop.ll | 4 +-- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index cb70278..3935400 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -51,12 +51,13 @@ class VectorCombine { public: VectorCombine(Function &F, const TargetTransformInfo &TTI, const DominatorTree &DT) - : F(F), TTI(TTI), DT(DT) {} + : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {} bool run(); private: Function &F; + IRBuilder<> Builder; const TargetTransformInfo &TTI; const DominatorTree &DT; @@ -64,6 +65,12 @@ private: unsigned Opcode, ExtractElementInst *&ConvertToShuffle, unsigned PreferredExtractIndex); + ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, + unsigned NewIndex); + void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, + Instruction &I); + void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, + Instruction &I); bool foldExtractExtract(Instruction &I); bool foldBitcastShuf(Instruction &I); bool scalarizeBinopOrCmp(Instruction &I); @@ -182,12 +189,13 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, /// the source vector (shift the scalar element) to a NewIndex for extraction. /// Return null if the input can be constant folded, so that we are not creating /// unnecessary instructions. -static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, - unsigned NewIndex) { +ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt, + unsigned NewIndex) { // If the extract can be constant-folded, this code is unsimplified. Defer // to other passes to handle that. Value *X = ExtElt->getVectorOperand(); Value *C = ExtElt->getIndexOperand(); + assert(isa(C) && "Expected a constant index operand"); if (isa(X)) return nullptr; @@ -196,11 +204,9 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, // ShufMask = { 2, undef, undef, undef } auto *VecTy = cast(X->getType()); SmallVector Mask(VecTy->getNumElements(), -1); - assert(isa(C) && "Expected a constant index operand"); Mask[NewIndex] = cast(C)->getZExtValue(); // extelt X, C --> extelt (shuffle X), NewIndex - IRBuilder<> Builder(ExtElt); Value *Shuf = Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift"); return cast(Builder.CreateExtractElement(Shuf, NewIndex)); @@ -209,8 +215,8 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt, /// Try to reduce extract element costs by converting scalar compares to vector /// compares followed by extract. /// cmp (ext0 V0, C), (ext1 V1, C) -static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, - Instruction &I) { +void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0, + ExtractElementInst *Ext1, Instruction &I) { assert(isa(&I) && "Expected a compare"); assert(cast(Ext0->getIndexOperand())->getZExtValue() == cast(Ext1->getIndexOperand())->getZExtValue() && @@ -218,7 +224,6 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C ++NumVecCmp; - IRBuilder<> Builder(&I); CmpInst::Predicate Pred = cast(&I)->getPredicate(); Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); Value *VecCmp = Builder.CreateCmp(Pred, V0, V1); @@ -230,8 +235,8 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, /// Try to reduce extract element costs by converting scalar binops to vector /// binops followed by extract. /// bo (ext0 V0, C), (ext1 V1, C) -static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, - Instruction &I) { +void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0, + ExtractElementInst *Ext1, Instruction &I) { assert(isa(&I) && "Expected a binary operator"); assert(cast(Ext0->getIndexOperand())->getZExtValue() == cast(Ext1->getIndexOperand())->getZExtValue() && @@ -239,7 +244,6 @@ static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C ++NumVecBO; - IRBuilder<> Builder(&I); Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand(); Value *VecBO = Builder.CreateBinOp(cast(&I)->getOpcode(), V0, V1); @@ -353,7 +357,6 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) { } // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC' ++NumShufOfBitcast; - IRBuilder<> Builder(&I); Value *CastV = Builder.CreateBitCast(V, DestTy); Value *Shuf = Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask); @@ -454,7 +457,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { ++NumScalarBO; // For constant cases, extract the scalar element, this should constant fold. - IRBuilder<> Builder(&I); if (IsConst0) V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index)); if (IsConst1) @@ -498,6 +500,7 @@ bool VectorCombine::run() { for (Instruction &I : BB) { if (isa(I)) continue; + Builder.SetInsertPoint(&I); MadeChange |= foldExtractExtract(I); MadeChange |= foldBitcastShuf(I); MadeChange |= scalarizeBinopOrCmp(I); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index a3280a8..1cceb48 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -41,11 +41,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y:%.*]] +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]] +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SHIFT2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: ret i32 [[X2Y210]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll index af33850..ac7ca1e 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll @@ -454,12 +454,12 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @PR34724( ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1 -- 2.7.4