From bdaa3f86a040b138c58de41d73d35b76fdec1380 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 29 Aug 2020 16:00:07 +0300 Subject: [PATCH] Revert "[InstCombine] Take 2: Perform trivial PHI CSE" While the original variant with doing this in InstSimplify (rightfully) caused questions and ultimately was detected to be a culprit of stage2-stage3 mismatch, it was expected that InstCombine-based implementation would be fine. But apparently it's not, as http://lab.llvm.org:8011/builders/clang-with-thin-lto-ubuntu/builds/24095/steps/compare-compilers/logs/stdio suggests. Which suggests that somewhere in InstCombine there is a loop over nondeterministically sorted container, which causes different worklist ordering. This reverts commit 3e69871ab5a66fb55913a2a2f5e7f5b42899a4c9. --- llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp | 19 +--- .../merging-multiple-stores-into-successor.ll | 5 +- .../phi-aware-aggregate-reconstruction.ll | 45 +++++++-- llvm/test/Transforms/InstCombine/phi-cse.ll | 18 ++-- .../InstCombine/phi-equal-incoming-pointers.ll | 101 +++++++++++++++------ llvm/test/Transforms/InstCombine/select.ll | 6 +- llvm/test/Transforms/LoopVectorize/reduction.ll | 3 +- 7 files changed, 132 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index 4a7e50a..d8510d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -34,7 +34,6 @@ STATISTIC(NumPHIsOfInsertValues, "Number of phi-of-insertvalue turned into insertvalue-of-phis"); STATISTIC(NumPHIsOfExtractValues, "Number of phi-of-extractvalue turned into extractvalue-of-phi"); -STATISTIC(NumPHICSEs, "Number of PHI's that got CSE'd"); /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged @@ -1408,7 +1407,7 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { // by other passes. Other passes shouldn't depend on this for correctness // however. PHINode *FirstPN = cast(PN.getParent()->begin()); - if (&PN != FirstPN) { + if (&PN != FirstPN) for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) { BasicBlock *BBA = PN.getIncomingBlock(i); BasicBlock *BBB = FirstPN->getIncomingBlock(i); @@ -1427,22 +1426,6 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) { } } - // Is there an identical PHI node before this one in this basic block? - for (PHINode &Src : PN.getParent()->phis()) { - // Once we've reached the PHI node we've been asked about, stop looking. - if (&Src == &PN) - break; - // Note that even though we've just canonicalized this PHI, due to the - // worklist visitation order, there are no guarantess that *every* PHI - // has been canonicalized, so we can't just compare operands ranges. - if (!PN.isIdenticalToWhenDefined(&Src)) - continue; - // Just use that PHI instead then. - ++NumPHICSEs; - return replaceInstUsesWith(PN, &Src); - } - } - // If this is an integer PHI and we know that it has an illegal type, see if // it is only used by trunc or trunc(lshr) operations. If so, we split the // PHI into the various pieces being extracted. This sort of thing is diff --git a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll index be00822..5be341f 100644 --- a/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ b/llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s +; RUN: opt %s -instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s @var_7 = external global i8, align 1 @var_1 = external global i32, align 4 @@ -29,10 +29,11 @@ define void @_Z4testv() { ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: ; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] ; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 0), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 0), align 2 ; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 0), align 16 -; CHECK-NEXT: store i32 [[STOREMERGE1]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[STOREMERGE]], i32* getelementptr inbounds ([0 x i32], [0 x i32]* @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], i16* getelementptr inbounds ([0 x i16], [0 x i16]* @arr_4, i64 0, i64 1), align 2 ; CHECK-NEXT: store i32 [[I8]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @arr_3, i64 0, i64 1), align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll index 0befa4b..866750c 100644 --- a/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll +++ b/llvm/test/Transforms/InstCombine/phi-aware-aggregate-reconstruction.ll @@ -25,8 +25,13 @@ define { i32, i32 } @test0({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] +; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] +; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 +; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] +; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 +; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 +; CHECK-NEXT: ret { i32, i32 } [[I8]] ; entry: br i1 %c, label %left, label %right @@ -157,13 +162,19 @@ define { i32, i32 } @test3({ i32, i32 } %agg_00, { i32, i32 } %agg_01, { i32, i3 ; CHECK-NEXT: br label [[BB0_MERGE]] ; CHECK: bb0.merge: ; CHECK-NEXT: [[AGG_00_PN:%.*]] = phi { i32, i32 } [ [[AGG_00:%.*]], [[BB00]] ], [ [[AGG_01:%.*]], [[BB01]] ] +; CHECK-NEXT: [[AGG_00_PN1:%.*]] = phi { i32, i32 } [ [[AGG_00]], [[BB00]] ], [ [[AGG_01]], [[BB01]] ] ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: bb10: ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_00_PN_PN:%.*]] = phi { i32, i32 } [ [[AGG_00_PN]], [[BB0_MERGE]] ], [ [[AGG_10:%.*]], [[BB10]] ] +; CHECK-NEXT: [[AGG_00_PN1_PN:%.*]] = phi { i32, i32 } [ [[AGG_00_PN1]], [[BB0_MERGE]] ], [ [[AGG_10]], [[BB10]] ] +; CHECK-NEXT: [[I9:%.*]] = extractvalue { i32, i32 } [[AGG_00_PN1_PN]], 1 +; CHECK-NEXT: [[I8:%.*]] = extractvalue { i32, i32 } [[AGG_00_PN_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: ret { i32, i32 } [[AGG_00_PN_PN]] +; CHECK-NEXT: [[I10:%.*]] = insertvalue { i32, i32 } undef, i32 [[I8]], 0 +; CHECK-NEXT: [[I11:%.*]] = insertvalue { i32, i32 } [[I10]], i32 [[I9]], 1 +; CHECK-NEXT: ret { i32, i32 } [[I11]] ; entry: br i1 %c0, label %bb0.dispatch, label %bb10 @@ -266,12 +277,17 @@ define { i32, i32 } @test5({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[MIDDLE]] ; CHECK: middle: -; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[AGG_LEFT_PN]], [[MIDDLE]] ] +; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[I8:%.*]], [[MIDDLE]] ] +; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ], [ [[I8]], [[MIDDLE]] ] +; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 +; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @baz() +; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 +; CHECK-NEXT: [[I8]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 ; CHECK-NEXT: [[C1:%.*]] = call i1 @geni1() ; CHECK-NEXT: br i1 [[C1]], label [[END:%.*]], label [[MIDDLE]] ; CHECK: end: -; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] +; CHECK-NEXT: ret { i32, i32 } [[I8]] ; entry: br i1 %c0, label %left, label %right @@ -316,14 +332,19 @@ define { i32, i32 } @test6({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] +; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] ; CHECK-NEXT: call void @baz() ; CHECK-NEXT: br i1 [[C1:%.*]], label [[END:%.*]], label [[PASSTHROUGH:%.*]] ; CHECK: passthrough: ; CHECK-NEXT: call void @qux() ; CHECK-NEXT: br label [[END]] ; CHECK: end: +; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 +; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @quux() -; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] +; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 +; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 +; CHECK-NEXT: ret { i32, i32 } [[I8]] ; entry: br i1 %c0, label %left, label %right @@ -430,8 +451,13 @@ define { i32, i32 } @test8({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: unreachable ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] +; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] +; CHECK-NEXT: [[I6:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 +; CHECK-NEXT: [[I5:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] +; CHECK-NEXT: [[I7:%.*]] = insertvalue { i32, i32 } undef, i32 [[I5]], 0 +; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I7]], i32 [[I6]], 1 +; CHECK-NEXT: ret { i32, i32 } [[I8]] ; entry: br i1 %c, label %left, label %right @@ -479,8 +505,13 @@ define { i32, i32 } @test9({ i32, i32 } %agg_left, { i32, i32 } %agg_right, i1 % ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[AGG_LEFT_PN:%.*]] = phi { i32, i32 } [ [[AGG_LEFT:%.*]], [[LEFT]] ], [ [[AGG_RIGHT:%.*]], [[RIGHT]] ] +; CHECK-NEXT: [[AGG_LEFT_PN1:%.*]] = phi { i32, i32 } [ [[AGG_LEFT]], [[LEFT]] ], [ [[AGG_RIGHT]], [[RIGHT]] ] +; CHECK-NEXT: [[I7:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN1]], 1 +; CHECK-NEXT: [[I0_PN:%.*]] = extractvalue { i32, i32 } [[AGG_LEFT_PN]], 0 +; CHECK-NEXT: [[I6:%.*]] = insertvalue { i32, i32 } undef, i32 [[I0_PN]], 0 ; CHECK-NEXT: call void @baz() -; CHECK-NEXT: ret { i32, i32 } [[AGG_LEFT_PN]] +; CHECK-NEXT: [[I8:%.*]] = insertvalue { i32, i32 } [[I6]], i32 [[I7]], 1 +; CHECK-NEXT: ret { i32, i32 } [[I8]] ; entry: br i1 %c, label %left, label %right diff --git a/llvm/test/Transforms/InstCombine/phi-cse.ll b/llvm/test/Transforms/InstCombine/phi-cse.ll index f369788..e1ee186 100644 --- a/llvm/test/Transforms/InstCombine/phi-cse.ll +++ b/llvm/test/Transforms/InstCombine/phi-cse.ll @@ -12,8 +12,9 @@ define void @test0(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -44,8 +45,9 @@ define void @test1(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -138,8 +140,9 @@ define void @negative_test4(i32 %v0, i32 %v1, i1 %c, i32* %d0, i32* %d1) { ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -230,8 +233,9 @@ define void @test7(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK: end: ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; @@ -264,8 +268,9 @@ define void @test8(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; @@ -297,9 +302,10 @@ define void @test9(i32 %v0, i32 %v1, i16 %v2, i16 %v3, i1 %c, i32* %d0, i32* %d1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[V0:%.*]], [[B0]] ], [ [[V1:%.*]], [[B1]] ] +; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[V0]], [[B0]] ], [ [[V1]], [[B1]] ] ; CHECK-NEXT: [[IBAD:%.*]] = phi i16 [ [[V2:%.*]], [[B0]] ], [ [[V3:%.*]], [[B1]] ] ; CHECK-NEXT: store i32 [[I0]], i32* [[D0:%.*]], align 4 -; CHECK-NEXT: store i32 [[I0]], i32* [[D1:%.*]], align 4 +; CHECK-NEXT: store i32 [[I1]], i32* [[D1:%.*]], align 4 ; CHECK-NEXT: store i16 [[IBAD]], i16* [[D2:%.*]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll index 4e37dfc..db5402b 100644 --- a/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll +++ b/llvm/test/Transforms/InstCombine/phi-equal-incoming-pointers.ll @@ -15,13 +15,17 @@ define i32 @test_gep_and_bitcast(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: +; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: +; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -56,13 +60,17 @@ define i32 @test_gep_and_bitcast_arg(i8* %obj, i1 %cond, i1 %cond2) { ; ALL-NEXT: entry: ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: +; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ:%.*]], i64 16 +; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: +; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ:%.*]], i64 16 -; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -108,13 +116,17 @@ define i32 @test_gep_and_bitcast_phi(i1 %cond, i1 %cond2, i1 %cond3) { ; ALL-NEXT: call void @foo.i8(i8* [[ANOTHER_PHI]]) ; ALL-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]] ; ALL: bb3: +; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb4: +; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB3]] ], [ [[PTR2_TYPED]], [[BB4]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB3]] ], [ [[PTR2_TYPED]], [[BB4]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND3:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -164,12 +176,15 @@ define i32 @test_gep_i32ptr(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i32* @get_ptr.i32() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: +; ALL-NEXT: [[PTR1_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: +; ALL-NEXT: [[PTR2_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = getelementptr inbounds i32, i32* [[OBJ]], i64 16 -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -203,13 +218,17 @@ define i32 @test_gep_and_bitcast_gep_base_ptr(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ0:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: +; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 +; ALL-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: +; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 +; ALL-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ0]], i64 32 -; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[BB1]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; ALL-NEXT: ret i32 [[RES]] @@ -241,19 +260,37 @@ exit: } define i32 @test_gep_and_bitcast_same_bb(i1 %cond, i1 %cond2) { -; ALL-LABEL: @test_gep_and_bitcast_same_bb( -; ALL-NEXT: entry: -; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() -; ALL-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] -; ALL: bb2: -; ALL-NEXT: br label [[EXIT]] -; ALL: exit: -; ALL-NEXT: [[PTR_TYPED_IN:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[PTR_TYPED:%.*]] = bitcast i8* [[PTR_TYPED_IN]] to i32* -; ALL-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 -; ALL-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 -; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 -; ALL-NEXT: ret i32 [[RES]] +; INSTCOMBINE-LABEL: @test_gep_and_bitcast_same_bb( +; INSTCOMBINE-NEXT: entry: +; INSTCOMBINE-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() +; INSTCOMBINE-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; INSTCOMBINE-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* +; INSTCOMBINE-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] +; INSTCOMBINE: bb2: +; INSTCOMBINE-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; INSTCOMBINE-NEXT: [[PTR2_TYPED:%.*]] = bitcast i8* [[PTR2]] to i32* +; INSTCOMBINE-NEXT: br label [[EXIT]] +; INSTCOMBINE: exit: +; INSTCOMBINE-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY:%.*]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; INSTCOMBINE-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 +; INSTCOMBINE-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 +; INSTCOMBINE-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 +; INSTCOMBINE-NEXT: ret i32 [[RES]] +; +; INSTCOMBINEGVN-LABEL: @test_gep_and_bitcast_same_bb( +; INSTCOMBINEGVN-NEXT: entry: +; INSTCOMBINEGVN-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() +; INSTCOMBINEGVN-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 +; INSTCOMBINEGVN-NEXT: [[PTR1_TYPED:%.*]] = bitcast i8* [[PTR1]] to i32* +; INSTCOMBINEGVN-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[BB2:%.*]] +; INSTCOMBINEGVN: bb2: +; INSTCOMBINEGVN-NEXT: br label [[EXIT]] +; INSTCOMBINEGVN: exit: +; INSTCOMBINEGVN-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR1_TYPED]], align 4 +; INSTCOMBINEGVN-NEXT: store i32 1, i32* [[PTR1_TYPED]], align 4 +; INSTCOMBINEGVN-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 +; INSTCOMBINEGVN-NEXT: ret i32 [[RES]] ; entry: %obj = call i8* @get_ptr.i8() @@ -291,7 +328,8 @@ define i32 @test_gep_and_bitcast_same_bb_and_extra_use(i1 %cond, i1 %cond2) { ; INSTCOMBINE-NEXT: br label [[EXIT]] ; INSTCOMBINE: exit: ; INSTCOMBINE-NEXT: [[PTR_TYPED:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY:%.*]] ], [ [[PTR2_TYPED]], [[BB2]] ] -; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[PTR_TYPED]], align 4 +; INSTCOMBINE-NEXT: [[RES_PHI_IN:%.*]] = phi i32* [ [[PTR1_TYPED]], [[ENTRY]] ], [ [[PTR2_TYPED]], [[BB2]] ] +; INSTCOMBINE-NEXT: [[RES_PHI:%.*]] = load i32, i32* [[RES_PHI_IN]], align 4 ; INSTCOMBINE-NEXT: store i32 1, i32* [[PTR_TYPED]], align 4 ; INSTCOMBINE-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i32 [[RES_PHI]], i32 1 ; INSTCOMBINE-NEXT: ret i32 [[RES]] @@ -340,12 +378,15 @@ define i8 @test_gep(i1 %cond, i1 %cond2) { ; ALL-NEXT: [[OBJ:%.*]] = call i8* @get_ptr.i8() ; ALL-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; ALL: bb1: +; ALL-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT:%.*]] ; ALL: bb2: +; ALL-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 ; ALL-NEXT: br label [[EXIT]] ; ALL: exit: -; ALL-NEXT: [[PTR_TYPED:%.*]] = getelementptr inbounds i8, i8* [[OBJ]], i64 16 -; ALL-NEXT: [[RES_PHI:%.*]] = load i8, i8* [[PTR_TYPED]], align 1 +; ALL-NEXT: [[PTR_TYPED:%.*]] = phi i8* [ [[PTR1]], [[BB1]] ], [ [[PTR2]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI_IN:%.*]] = phi i8* [ [[PTR1]], [[BB1]] ], [ [[PTR2]], [[BB2]] ] +; ALL-NEXT: [[RES_PHI:%.*]] = load i8, i8* [[RES_PHI_IN]], align 1 ; ALL-NEXT: store i8 1, i8* [[PTR_TYPED]], align 1 ; ALL-NEXT: [[RES:%.*]] = select i1 [[COND2:%.*]], i8 [[RES_PHI]], i8 1 ; ALL-NEXT: ret i8 [[RES]] diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 0ac9c69..73e1e4af 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1903,6 +1903,7 @@ exit: } ; Shows how we can leverage dominance to eliminate duplicating selects. +; TODO: We can optimize further if we eliminate duplicating Phis and similify the whole thing to phi[x, y] * 3. define i32 @select_dominance_chain(i1 %cond, i32 %x, i32 %y) { ; CHECK-LABEL: @select_dominance_chain( ; CHECK-NEXT: entry: @@ -1925,7 +1926,10 @@ define i32 @select_dominance_chain(i1 %cond, i32 %x, i32 %y) { ; CHECK-NEXT: br label [[MERGE_3]] ; CHECK: merge.3: ; CHECK-NEXT: [[S_3:%.*]] = phi i32 [ [[Y:%.*]], [[IF_FALSE_3]] ], [ [[X:%.*]], [[IF_TRUE_3]] ] -; CHECK-NEXT: [[SUM_2:%.*]] = mul i32 [[S_3]], 3 +; CHECK-NEXT: [[S_2:%.*]] = phi i32 [ [[Y]], [[IF_FALSE_3]] ], [ [[X]], [[IF_TRUE_3]] ] +; CHECK-NEXT: [[S_1:%.*]] = phi i32 [ [[Y]], [[IF_FALSE_3]] ], [ [[X]], [[IF_TRUE_3]] ] +; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[S_1]], [[S_2]] +; CHECK-NEXT: [[SUM_2:%.*]] = add i32 [[SUM_1]], [[S_3]] ; CHECK-NEXT: ret i32 [[SUM_2]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index 33bd3d1..4599899 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -504,6 +504,7 @@ exit: ;CHECK: add <4 x i32> ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0 ;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ] +;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ] ;CHECK: ret i32 define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) { %1 = icmp sgt i32 %n, 0 @@ -543,7 +544,7 @@ end: ; variable. We cannot vectorize this. ; CHECK-LABEL: reduction_reset( ; CHECK-NOT: <4 x i32> -define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) { +define void @reduction_reset(i32 %N, i32* nocapture readonly %arrayA, i32* nocapture %arrayB) { entry: %c4 = icmp sgt i32 %N, 0 br i1 %c4, label %.lr.ph.preheader, label %._crit_edge -- 2.7.4