From 3e69871ab5a66fb55913a2a2f5e7f5b42899a4c9 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 29 Aug 2020 10:42:38 +0300 Subject: [PATCH] [InstCombine] Take 2: Perform trivial PHI CSE MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The original take was 6102310d814ad73eab60a88b21dd70874f7a056f, which taught InstSimplify to do that, which seemed better at time, since we got EarlyCSE support for free. However, it was proven that we can not do that there, the simplified-to PHI would not be reachable from the original PHI, and that is not something InstSimplify is allowed to do, as noted in the commit ed90f15efb40d26b5d3ead3bb8e9e284218e0186 that reverted it : > It appears to cause compilation non-determinism and caused stage3 mismatches. However InstCombine already does many different optimizations, so it should be a safe place to do it here. Note that we still can't just compare incoming values ranges, because there is no guarantee that these PHI's we'd simplify to were already re-visited and sorted. However coming up with a test is problematic. Effects on vanilla llvm test-suite + RawSpeed: ``` | statistic name | baseline | proposed | Δ | % | |%| | |----------------------------------------------------|-----------|-----------|-------:|---------:|---------:| | instcombine.NumPHICSEs | 0 | 22228 | 22228 | 0.00% | 0.00% | | asm-printer.EmittedInsts | 7942329 | 7942456 | 127 | 0.00% | 0.00% | | assembler.ObjectBytes | 254295632 | 254313792 | 18160 | 0.01% | 0.01% | | early-cse.NumCSE | 2183283 | 2183272 | -11 | 0.00% | 0.00% | | early-cse.NumSimplify | 550105 | 541842 | -8263 | -1.50% | 1.50% | | instcombine.NumAggregateReconstructionsSimplified | 73 | 4506 | 4433 | 6072.60% | 6072.60% | | instcombine.NumCombined | 3640311 | 3666911 | 26600 | 0.73% | 0.73% | | instcombine.NumDeadInst | 1778204 | 1783318 | 5114 | 0.29% | 0.29% | | instcount.NumCallInst | 1758395 | 1758804 | 409 | 0.02% | 0.02% | | instcount.NumInvokeInst | 59478 | 59502 | 24 | 0.04% | 0.04% | | instcount.NumPHIInst | 330557 | 330549 | -8 | 0.00% | 0.00% | | instcount.TotalBlocks | 1077138 | 1077221 | 83 | 0.01% | 0.01% | | instcount.TotalFuncs | 101442 | 101441 | -1 | 0.00% | 0.00% | | instcount.TotalInsts | 8831946 | 8832611 | 665 | 0.01% | 0.01% | | simplifycfg.NumInvokes | 4300 | 4410 | 110 | 2.56% | 2.56% | | simplifycfg.NumSimpl | 1019813 | 999740 | -20073 | -1.97% | 1.97% | ``` So it fires ~22k times, which is less than ~24k the take 1 did. It allows foldAggregateConstructionIntoAggregateReuse() to actually work after PHI-of-extractvalue folds did their thing. Previously SimplifyCFG would have done this PHI CSE, of all places. Additionally, allows some more `invoke`->`call` folds to happen (+110, +2.56%). All in all, expectedly, this catches less things overall, but all the motivational cases are still caught, so all good. --- llvm/lib/IR/Instruction.cpp | 28 ++++-- llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 20 +++- .../merging-multiple-stores-into-successor.ll | 5 +- .../phi-aware-aggregate-reconstruction.ll | 45 ++------- llvm/test/Transforms/InstCombine/phi-cse.ll | 18 ++-- .../InstCombine/phi-equal-incoming-pointers.ll | 101 ++++++--------------- llvm/test/Transforms/InstCombine/select.ll | 5 +- llvm/test/Transforms/LoopVectorize/reduction.ll | 3 +- 8 files changed, 88 insertions(+), 137 deletions(-) diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index bfbd801..f091425 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -483,17 +483,33 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { if (getNumOperands() == 0 && I->getNumOperands() == 0) return haveSameSpecialState(this, I); - // We have two instructions of identical opcode and #operands. Check to see - // if all operands are the same. - if (!std::equal(op_begin(), op_end(), I->op_begin())) - return false; - + // PHI nodes are special. if (const PHINode *thisPHI = dyn_cast(this)) { const PHINode *otherPHI = cast(I); - return std::equal(thisPHI->block_begin(), thisPHI->block_end(), + // PHI nodes don't nessesairly have their operands in the same order, + // so we shouldn't just compare ranges of incoming blocks/values. + + // If both PHI's are in the same basic block, which is the most interesting + // case, we know they must have identical predecessor list, + // so we only need to check the incoming values. + if (thisPHI->getParent() == otherPHI->getParent()) { + return all_of(thisPHI->blocks(), [thisPHI, otherPHI](BasicBlock *PredBB) { + return thisPHI->getIncomingValueForBlock(PredBB) == + otherPHI->getIncomingValueForBlock(PredBB); + }); + } + + // Otherwise, let's just naively compare operands/blocks. + return std::equal(op_begin(), op_end(), I->op_begin()) && + std::equal(thisPHI->block_begin(), thisPHI->block_end(), otherPHI->block_begin()); } + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same. + if (!std::equal(op_begin(), op_end(), I->op_begin())) + return false; + return haveSameSpecialState(this, I); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index d8510d5..b63e7bf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -34,6 +34,7 @@ STATISTIC(NumPHIsOfInsertValues, "Number of phi-of-insertvalue turned into insertvalue-of-phis"); STATISTIC(NumPHIsOfExtractValues, "Number of phi-of-extractvalue turned into extractvalue-of-phi"); +STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged @@ -1407,7 +1408,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { // by other passes. Other passes shouldn't depend on this for correctness // however. PHINode *FirstPN = cast(PN.getParent()->begin()); - if (&PN != FirstPN) + if (&PN != FirstPN) { for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *BBA = PN.getIncomingBlock(i); BasicBlock *BBB = FirstPN->getIncomingBlock(i); @@ -1426,6 +1427,23 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { } } + // Is there an identical PHI node before this one in this basic block? + for (PHINode &Src : PN.getParent()->phis()) { + // Once we've reached the PHI node we've been asked about, stop looking. + if (&Src == &PN) + break; + // Note that even though we've just canonicalized this PHI, due to the + // worklist visitation order, there are no guarantess that *every* PHI + // has been canonicalized, so we can't just compare operands ranges. + if (!PN.isIdenticalToWhenDefined(&Src)) + continue; + // Just use that PHI instead then. + ++NumPHICSEs; + PN.replaceAllUsesWith(&Src); + return &PN; + } + } + // If this is an integer PHI and we know that it has an illegal type, see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll index 5be341f..be00822 100644 --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s +; RUN: opt %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s @var_7 = external global i8, align 1 @var_1 = external global i32, align 4 @@ -29,11 +29,10 @@ define void @_Z4testv() { ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: ; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] ; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 0), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 0), align 2 ; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 0), align 16 -; CHECK-NEXT: store i32 [[STOREMERGE]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 1), align 2 ; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 1), align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll index 866750c..0befa4b 100644 --- a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll +++ b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll @@ -25,13 +25,8 @@ define { i32, i32 } @test0({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] -; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] -; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 -; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 -; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 -; CHECK-NEXT: ret { i32, i32 } [[I8]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] ; entry: br i1 %c, label %left, label %right @@ -162,19 +157,13 @@ define { i32, i32 } @test3({ i32, i32 } %agg_00, { i32, i32 } %agg_01, { i32, i3 ; CHECK-NEXT: br label [[BB0_MERGE]] ; CHECK: bb0.merge: ; CHECK-NEXT: [[AGG_00_PN:%.*]] = phi { i32, i32 } [ [[AGG_00:%.*]], [[BB00]] ], [ [[AGG_01:%.*]], [[BB01]] ] -; CHECK-NEXT: [[AGG_00_PN1:%.*]] = phi { i32, i32 } [ [[AGG_00]], [[BB00]] ], [ [[AGG_01]], [[BB01]] ] ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: bb10: ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_00_PN_PN:%.*]] = phi { i32, i32 } [ [[AGG_00_PN]], [[BB0_MERGE]] ], [ [[AGG_10:%.*]], [[BB10]] ] -; CHECK-NEXT: [[AGG_00_PN1_PN:%.*]] = phi { i32, i32 } [ [[AGG_00_PN1]], [[BB0_MERGE]] ], [ [[AGG_10]], [[BB10]] ] -; CHECK-NEXT: [[I9:%.*]] = extractvalue { i32, i32 } [[AGG_00_PN1_PN]], 1 -; CHECK-NEXT: [[I8:%.*]] = extractvalue { i32, i32 } [[AGG_00_PN_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: [[I10:%.*]] = insertvalue { i32, i32 } undef, i32 [[I8]], 0 -; CHECK-NEXT: [[I11:%.*]] = insertvalue { i32, i32 } [[I10]], i32 [[I9]], 1 -; CHECK-NEXT: ret { i32, i32 } [[I11]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_00_PN_PN]] ; entry: br i1 %c0, label %bb0.dispatch, label %bb10 @@ -277,17 +266,12 @@ define { i32, i32 } @test5({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[MIDDLE]] ; CHECK: middle: -; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[I8:%.*]], [[MIDDLE]] ] -; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ], [ [[I8]], [[MIDDLE]] ] -; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 -; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 +; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[AGG_LEFT_PN]], [[MIDDLE]] ] ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 -; CHECK-NEXT: [[I8]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 ; CHECK-NEXT: [[C1:%.*]] = call i1 @geni1() ; CHECK-NEXT: br i1 [[C1]], label [[END:%.*]], label [[MIDDLE]] ; CHECK: end: -; CHECK-NEXT: ret { i32, i32 } [[I8]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] ; entry: br i1 %c0, label %left, label %right @@ -332,19 +316,14 @@ define { i32, i32 } @test6({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] -; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] ; CHECK-NEXT: call void @baz() ; CHECK-NEXT: br i1 [[C1:%.*]], label [[END:%.*]], label [[PASSTHROUGH:%.*]] ; CHECK: passthrough: ; CHECK-NEXT: call void @qux() ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 -; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @quux() -; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 -; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 -; CHECK-NEXT: ret { i32, i32 } [[I8]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] ; entry: br i1 %c0, label %left, label %right @@ -451,13 +430,8 @@ define { i32, i32 } @test8({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: unreachable ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] -; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] -; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 -; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 -; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 -; CHECK-NEXT: ret { i32, i32 } [[I8]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] ; entry: br i1 %c, label %left, label %right @@ -505,13 +479,8 @@ define { i32, i32 } @test9({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] -; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] -; CHECK-NEXT: [[I7:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 -; CHECK-NEXT: [[I0_PN:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 -; CHECK-NEXT: [[I6:%.*]] = insertvalue { i32, i32 } undef, i32 [[I0_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I6]], i32 [[I7]], 1 -; CHECK-NEXT: ret { i32, i32 } [[I8]] +; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] ; entry: br i1 %c, label %left, label %right diff --git a/llvm/test/Transforms/InstCombine/phi-cse.ll b/llvm/test/Transforms/InstCombine/phi-cse.ll index e1ee186..f369788 100644 --- a/llvm/test/Transforms/InstCombine/phi-cse.ll +++ b/llvm/test/Transforms/InstCombine/phi-cse.ll @@ -12,9 +12,8 @@ define void @test0(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -45,9 +44,8 @@ define void @test1(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -140,9 +138,8 @@ define void @negative_test4(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -233,9 +230,8 @@ define void @test7(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK: end: ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; @@ -268,9 +264,8 @@ define void @test8(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; @@ -302,10 +297,9 @@ define void @test9(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] -; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll index db5402b..4e37dfc 100644 --- a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll +++ b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll @@ -15,17 +15,13 @@ define i32 @test_gep_and_bitcast(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: -; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: -; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -60,17 +56,13 @@ define i32 @test_gep_and_bitcast_arg(i8* %obj, i1 %cond, i1 %cond2) { ; ALL-NEXT: entry: ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: -; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ:%.*]], i64 16 -; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: -; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ:%.*]], i64 16 +; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -116,17 +108,13 @@ define i32 @test_gep_and_bitcast_phi(i1 %cond, i1 %cond2, i1 %cond3) { ; ALL-NEXT: call void @foo.i8(i8* [[ANOTHER_PHI]]) ; ALL-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] ; ALL: bb3: -; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb4: -; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB3]] ], [ [[PTR2_TYPED]], [[BB4]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB3]] ], [ [[PTR2_TYPED]], [[BB4]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND3:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -176,15 +164,12 @@ define i32 @test_gep_i32ptr(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i32* @get_ptr.i32() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: -; ALL-NEXT: [[PTR1_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: -; ALL-NEXT: [[PTR2_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -218,17 +203,13 @@ define i32 @test_gep_and_bitcast_gep_base_ptr(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ0:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: -; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 -; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: -; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 -; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 +; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -260,37 +241,19 @@ exit: } define i32 @test_gep_and_bitcast_same_bb(i1 %cond, i1 %cond2) { -; INSTCOMBINE-LABEL: @test_gep_and_bitcast_same_bb( -; INSTCOMBINE-NEXT: entry: -; INSTCOMBINE-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() -; INSTCOMBINE-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; INSTCOMBINE-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* -; INSTCOMBINE-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] -; INSTCOMBINE: bb2: -; INSTCOMBINE-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; INSTCOMBINE-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* -; INSTCOMBINE-NEXT: br label [[EXIT]] -; INSTCOMBINE: exit: -; INSTCOMBINE-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY:%.*]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; INSTCOMBINE-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 -; INSTCOMBINE-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 -; INSTCOMBINE-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 -; INSTCOMBINE-NEXT: ret i32 [[RES]] -; -; INSTCOMBINEGVN-LABEL: @test_gep_and_bitcast_same_bb( -; INSTCOMBINEGVN-NEXT: entry: -; INSTCOMBINEGVN-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() -; INSTCOMBINEGVN-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; INSTCOMBINEGVN-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* -; INSTCOMBINEGVN-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] -; INSTCOMBINEGVN: bb2: -; INSTCOMBINEGVN-NEXT: br label [[EXIT]] -; INSTCOMBINEGVN: exit: -; INSTCOMBINEGVN-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR1_TYPED]], align 4 -; INSTCOMBINEGVN-NEXT: store i32 1, i32* [[PTR1_TYPED]], align 4 -; INSTCOMBINEGVN-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 -; INSTCOMBINEGVN-NEXT: ret i32 [[RES]] +; ALL-LABEL: @test_gep_and_bitcast_same_bb( +; ALL-NEXT: entry: +; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() +; ALL-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] +; ALL: bb2: +; ALL-NEXT: br label [[EXIT]] +; ALL: exit: +; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 +; ALL-NEXT: ret i32 [[RES]] ; entry: %obj = call i8* @get_ptr.i8() @@ -328,8 +291,7 @@ define i32 @test_gep_and_bitcast_same_bb_and_extra_use(i1 %cond, i1 %cond2) { ; INSTCOMBINE-NEXT: br label [[EXIT]] ; INSTCOMBINE: exit: ; INSTCOMBINE-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY:%.*]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; INSTCOMBINE-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 ; INSTCOMBINE-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; INSTCOMBINE-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; INSTCOMBINE-NEXT: ret i32 [[RES]] @@ -378,15 +340,12 @@ define i8 @test_gep(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: -; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: -; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i8* [ [[PTR1]], [[BB1]] ], [ [[PTR2]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i8* [ [[PTR1]], [[BB1]] ], [ [[PTR2]], [[BB2]] ] -; ALL-NEXT: [[RES_PHI:%.*]] = load i8, i8* [[RES_PHI_IN]], align 1 +; ALL-NEXT: [[PTR_TYPED:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[RES_PHI:%.*]] = load i8, i8* [[PTR_TYPED]], align 1 ; ALL-NEXT: store i8 1, i8* [[PTR_TYPED]], align 1 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i8 [[RES_PHI]], i8 1 ; ALL-NEXT: ret i8 [[RES]] diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 73e1e4af..aa5472e 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1926,10 +1926,7 @@ define i32 @select_dominance_chain(i1 %cond, i32 %x, i32 %y) { ; CHECK-NEXT: br label [[MERGE_3]] ; CHECK: merge.3: ; CHECK-NEXT: [[S_3:%.*]] = phi i32 [ [[Y:%.*]], [[IF_FALSE_3]] ], [ [[X:%.*]], [[IF_TRUE_3]] ] -; CHECK-NEXT: [[S_2:%.*]] = phi i32 [ [[Y]], [[IF_FALSE_3]] ], [ [[X]], [[IF_TRUE_3]] ] -; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ [[Y]], [[IF_FALSE_3]] ], [ [[X]], [[IF_TRUE_3]] ] -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[S_1]], [[S_2]] -; CHECK-NEXT: [[SUM_2:%.*]] = add i32 [[SUM_1]], [[S_3]] +; CHECK-NEXT: [[SUM_2:%.*]] = mul i32 [[S_3]], 3 ; CHECK-NEXT: ret i32 [[SUM_2]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index 4599899..33bd3d1 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -504,7 +504,6 @@ exit: ;CHECK: add <4 x i32> ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0 ;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ] -;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ] ;CHECK: ret i32 define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) { %1 = icmp sgt i32 %n, 0 @@ -544,7 +543,7 @@ end: ; variable. We cannot vectorize this. ; CHECK-LABEL: reduction_reset( ; CHECK-NOT: <4 x i32> -define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) { +define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) { entry: %c4 = icmp sgt i32 %N, 0 br i1 %c4, label %.lr.ph.preheader, label %._crit_edge -- 2.7.4