From 01f999ae8871544ab4996fd1368c0dfe4c4a0765 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 29 May 2020 09:29:39 +0100 Subject: [PATCH] [SCCP] Switch to widen at PHIs, stores and call edges. Currently SCCP does not widen PHIs, stores or along call edges (arguments/return values), but on operations that directly extend ranges (like binary operators). This means PHIs, stores and call edges are not pessimized by widening currently, while binary operators are. The main reason for widening operators initially was that opting-out for certain operations was more straight-forward in the initial implementation (and it did not matter too much, as range support initially was only implemented for a very limited set of operations. During the discussion in D78391, it was suggested to consider flipping widening to PHIs, stores and along call edges. After adding support for tracking the number of range extensions in ValueLattice, limiting the number of range extensions per value is straight forward. This patch introduces a MaxWidenSteps option to the MergeOptions, limiting the number of range extensions per value. For PHIs, it seems natural allow an extension for each (active) incoming value plus 1. For the other cases, a arbitrary limit of 10 has been chosen initially. It would potentially make sense to set it depending on the users of a function/global, but that still needs investigating. This potentially leads to more state-changes and longer compile-times. The results look quite promising (MultiSource, SPEC): Same hash: 179 (filtered out) Remaining: 58 Metric: sccp.IPNumInstRemoved Program base widen-phi diff test-suite...ks/Prolangs-C/agrep/agrep.test 58.00 82.00 41.4% test-suite...marks/SciMark2-C/scimark2.test 32.00 43.00 34.4% test-suite...rks/FreeBench/mason/mason.test 6.00 8.00 33.3% test-suite...langs-C/football/football.test 104.00 128.00 23.1% test-suite...cations/hexxagon/hexxagon.test 36.00 42.00 16.7% test-suite...CFP2000/177.mesa/177.mesa.test 214.00 249.00 16.4% test-suite...ngs-C/assembler/assembler.test 14.00 16.00 14.3% test-suite...arks/VersaBench/dbms/dbms.test 10.00 11.00 10.0% test-suite...oxyApps-C++/miniFE/miniFE.test 43.00 47.00 9.3% test-suite...ications/JM/ldecod/ldecod.test 179.00 195.00 8.9% test-suite...CFP2006/433.milc/433.milc.test 249.00 265.00 6.4% test-suite.../CINT2000/175.vpr/175.vpr.test 98.00 104.00 6.1% test-suite...peg2/mpeg2dec/mpeg2decode.test 70.00 74.00 5.7% test-suite...CFP2000/188.ammp/188.ammp.test 71.00 75.00 5.6% test-suite...ce/Benchmarks/PAQ8p/paq8p.test 111.00 117.00 5.4% test-suite...ce/Applications/Burg/burg.test 41.00 43.00 4.9% test-suite...000/197.parser/197.parser.test 66.00 69.00 4.5% test-suite...tions/lambda-0.1.3/lambda.test 23.00 24.00 4.3% test-suite...urce/Applications/lua/lua.test 301.00 313.00 4.0% test-suite...TimberWolfMC/timberwolfmc.test 76.00 79.00 3.9% test-suite...lications/ClamAV/clamscan.test 991.00 1030.00 3.9% test-suite...plications/d/make_dparser.test 53.00 55.00 3.8% test-suite...fice-ispell/office-ispell.test 83.00 86.00 3.6% test-suite...lications/obsequi/Obsequi.test 28.00 29.00 3.6% test-suite.../Prolangs-C/bison/mybison.test 56.00 58.00 3.6% test-suite.../CINT2000/254.gap/254.gap.test 170.00 176.00 3.5% test-suite.../Applications/lemon/lemon.test 30.00 31.00 3.3% test-suite.../CINT2000/176.gcc/176.gcc.test 1202.00 1240.00 3.2% test-suite...pplications/treecc/treecc.test 79.00 81.00 2.5% test-suite...chmarks/MallocBench/gs/gs.test 357.00 366.00 2.5% test-suite...eeBench/analyzer/analyzer.test 103.00 105.00 1.9% test-suite...T2006/445.gobmk/445.gobmk.test 1697.00 1724.00 1.6% test-suite...006/453.povray/453.povray.test 1812.00 1839.00 1.5% test-suite.../Benchmarks/Bullet/bullet.test 337.00 342.00 1.5% test-suite.../CINT2000/252.eon/252.eon.test 426.00 432.00 1.4% test-suite...T2000/300.twolf/300.twolf.test 214.00 217.00 1.4% test-suite...pplications/oggenc/oggenc.test 244.00 247.00 1.2% test-suite.../CINT2006/403.gcc/403.gcc.test 4008.00 4055.00 1.2% test-suite...T2006/456.hmmer/456.hmmer.test 175.00 177.00 1.1% test-suite...nal/skidmarks10/skidmarks.test 430.00 434.00 0.9% test-suite.../Applications/sgefa/sgefa.test 115.00 116.00 0.9% test-suite...006/447.dealII/447.dealII.test 1082.00 1091.00 0.8% test-suite...6/482.sphinx3/482.sphinx3.test 141.00 142.00 0.7% test-suite...ocBench/espresso/espresso.test 152.00 153.00 0.7% test-suite...3.xalancbmk/483.xalancbmk.test 4003.00 4025.00 0.5% test-suite...lications/sqlite3/sqlite3.test 548.00 551.00 0.5% test-suite...marks/7zip/7zip-benchmark.test 5522.00 5551.00 0.5% test-suite...nsumer-lame/consumer-lame.test 208.00 209.00 0.5% test-suite...:: External/Povray/povray.test 1556.00 1563.00 0.4% test-suite...000/186.crafty/186.crafty.test 298.00 299.00 0.3% test-suite.../Applications/SPASS/SPASS.test 2019.00 2025.00 0.3% test-suite...ications/JM/lencod/lencod.test 8427.00 8449.00 0.3% test-suite...6/464.h264ref/464.h264ref.test 6797.00 6813.00 0.2% test-suite...6/471.omnetpp/471.omnetpp.test 431.00 430.00 -0.2% test-suite...006/450.soplex/450.soplex.test 446.00 447.00 0.2% test-suite...0.perlbench/400.perlbench.test 1729.00 1727.00 -0.1% test-suite...000/255.vortex/255.vortex.test 3815.00 3819.00 0.1% Reviewers: efriedma, nikic, davide Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D79036 --- llvm/include/llvm/Analysis/ValueLattice.h | 21 +- llvm/lib/Transforms/Scalar/SCCP.cpp | 57 +++-- llvm/test/Transforms/SCCP/constant-range-struct.ll | 24 +- llvm/test/Transforms/SCCP/ipsccp-cycles.ll | 242 +++++++++++++++++++++ .../Transforms/SCCP/resolvedundefsin-tracked-fn.ll | 5 +- llvm/test/Transforms/SCCP/widening.ll | 130 ++++++++--- 6 files changed, 410 insertions(+), 69 deletions(-) create mode 100644 llvm/test/Transforms/SCCP/ipsccp-cycles.ll diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index 718c264..00a230f 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -113,10 +113,16 @@ public: /// number of steps. bool CheckWiden; + /// The number of allowed widening steps (including setting the range + /// initially). + unsigned MaxWidenSteps; + MergeOptions() : MergeOptions(false, false) {} - MergeOptions(bool MayIncludeUndef, bool CheckWiden) - : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden) {} + MergeOptions(bool MayIncludeUndef, bool CheckWiden, + unsigned MaxWidenSteps = 1) + : MayIncludeUndef(MayIncludeUndef), CheckWiden(CheckWiden), + MaxWidenSteps(MaxWidenSteps) {} MergeOptions &setMayIncludeUndef(bool V = true) { MayIncludeUndef = V; @@ -127,6 +133,12 @@ public: CheckWiden = V; return *this; } + + MergeOptions &setMaxWidenSteps(unsigned Steps = 1) { + CheckWiden = true; + MaxWidenSteps = Steps; + return *this; + } }; // ConstVal and Range are initialized on-demand. @@ -349,7 +361,7 @@ public: // Simple form of widening. If a range is extended multiple times, go to // overdefined. - if (Opts.CheckWiden && ++NumRangeExtensions == 1) + if (Opts.CheckWiden && ++NumRangeExtensions > Opts.MaxWidenSteps) return markOverdefined(); assert(NewR.contains(getConstantRange()) && @@ -458,6 +470,9 @@ public: return nullptr; } + + unsigned getNumRangeExtensions() const { return NumRangeExtensions; } + void setNumRangeExtensions(unsigned N) { NumRangeExtensions = N; } }; static_assert(sizeof(ValueLatticeElement) <= 40, diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index e6477e3..6eb83af 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -72,6 +72,15 @@ STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP"); STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP"); STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP"); +// The maximum number of range extensions allowed for operations requiring +// widening. +static const unsigned MaxNumRangeExtensions = 10; + +/// Returns MergeOptions with MaxWidenSteps set to MaxNumRangeExtensions. +static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() { + return ValueLatticeElement::MergeOptions().setMaxWidenSteps( + MaxNumRangeExtensions); +} namespace { // Helper to check if \p LV is either a constant or a constant @@ -401,7 +410,7 @@ private: bool mergeInValue(ValueLatticeElement &IV, Value *V, ValueLatticeElement MergeWithV, ValueLatticeElement::MergeOptions Opts = { - /*MayIncludeUndef=*/false, /*CheckWiden=*/true}) { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { if (IV.mergeIn(MergeWithV, Opts)) { pushToWorkList(IV, V); LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : " @@ -413,7 +422,7 @@ private: bool mergeInValue(Value *V, ValueLatticeElement MergeWithV, ValueLatticeElement::MergeOptions Opts = { - /*MayIncludeUndef=*/false, /*CheckWiden=*/true}) { + /*MayIncludeUndef=*/false, /*CheckWiden=*/false}) { assert(!V->getType()->isStructTy() && "non-structs should use markConstant"); return mergeInValue(ValueState[V], V, MergeWithV, Opts); @@ -725,24 +734,36 @@ void SCCPSolver::visitPHINode(PHINode &PN) { if (PN.getNumIncomingValues() > 64) return (void)markOverdefined(&PN); + unsigned NumActiveIncoming = 0; + // Look at all of the executable operands of the PHI node. If any of them // are overdefined, the PHI becomes overdefined as well. If they are all // constant, and they agree with each other, the PHI becomes the identical - // constant. If they are constant and don't agree, the PHI is overdefined. - // If there are no executable operands, the PHI remains unknown. - bool Changed = false; + // constant. If they are constant and don't agree, the PHI is a constant + // range. If there are no executable operands, the PHI remains unknown. + ValueLatticeElement PhiState = getValueState(&PN); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent())) continue; - ValueLatticeElement &Res = getValueState(&PN); - Changed |= Res.mergeIn(IV); - if (Res.isOverdefined()) + ValueLatticeElement IV = getValueState(PN.getIncomingValue(i)); + PhiState.mergeIn(IV); + NumActiveIncoming++; + if (PhiState.isOverdefined()) break; } - if (Changed) - pushToWorkListMsg(ValueState[&PN], &PN); + + // We allow up to 1 range extension per active incoming value and one + // additional extension. Note that we manually adjust the number of range + // extensions to match the number of active incoming values. This helps to + // limit multiple extensions caused by the same incoming value, if other + // incoming values are equal. + mergeInValue(&PN, PhiState, + ValueLatticeElement::MergeOptions().setMaxWidenSteps( + NumActiveIncoming + 1)); + ValueLatticeElement &PhiStateRef = getValueState(&PN); + PhiStateRef.setNumRangeExtensions( + std::max(NumActiveIncoming, PhiStateRef.getNumRangeExtensions())); } void SCCPSolver::visitReturnInst(ReturnInst &I) { @@ -1112,8 +1133,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { // If we are tracking this global, merge in the known value for it. auto It = TrackedGlobals.find(GV); if (It != TrackedGlobals.end()) { - mergeInValue(IV, &I, It->second, - ValueLatticeElement::MergeOptions().setCheckWiden(false)); + mergeInValue(IV, &I, It->second, getMaxWidenStepsOpts()); return; } } @@ -1201,11 +1221,11 @@ void SCCPSolver::handleCallArguments(CallBase &CB) { if (auto *STy = dyn_cast(AI->getType())) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { ValueLatticeElement CallArg = getStructValueState(*CAI, i); - mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg); + mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg, + getMaxWidenStepsOpts()); } } else - mergeInValue(&*AI, getValueState(*CAI), - ValueLatticeElement::MergeOptions().setCheckWiden(false)); + mergeInValue(&*AI, getValueState(*CAI), getMaxWidenStepsOpts()); } } } @@ -1316,14 +1336,15 @@ void SCCPSolver::handleCallResult(CallBase &CB) { // into this call site. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) mergeInValue(getStructValueState(&CB, i), &CB, - TrackedMultipleRetVals[std::make_pair(F, i)]); + TrackedMultipleRetVals[std::make_pair(F, i)], + getMaxWidenStepsOpts()); } else { auto TFRVI = TrackedRetVals.find(F); if (TFRVI == TrackedRetVals.end()) return handleCallOverdefined(CB); // Not tracking this callee. // If so, propagate the return value of the callee into this call result. - mergeInValue(&CB, TFRVI->second); + mergeInValue(&CB, TFRVI->second, getMaxWidenStepsOpts()); } } diff --git a/llvm/test/Transforms/SCCP/constant-range-struct.ll b/llvm/test/Transforms/SCCP/constant-range-struct.ll index 6a602fe..290437d 100644 --- a/llvm/test/Transforms/SCCP/constant-range-struct.ll +++ b/llvm/test/Transforms/SCCP/constant-range-struct.ll @@ -102,22 +102,14 @@ define void @struct2_caller() { ; CHECK-NEXT: [[S:%.*]] = call { i64, i64 } @struct2() ; CHECK-NEXT: [[V1:%.*]] = extractvalue { i64, i64 } [[S]], 0 ; CHECK-NEXT: [[V2:%.*]] = extractvalue { i64, i64 } [[S]], 1 -; CHECK-NEXT: [[T_1:%.*]] = icmp ne i64 [[V1]], 10 -; CHECK-NEXT: call void @use(i1 [[T_1]]) -; CHECK-NEXT: [[T_2:%.*]] = icmp ult i64 [[V1]], 100 -; CHECK-NEXT: call void @use(i1 [[T_2]]) -; CHECK-NEXT: [[T_3:%.*]] = icmp ne i64 [[V2]], 0 -; CHECK-NEXT: call void @use(i1 [[T_3]]) -; CHECK-NEXT: [[T_4:%.*]] = icmp ult i64 [[V2]], 301 -; CHECK-NEXT: call void @use(i1 [[T_4]]) -; CHECK-NEXT: [[F_1:%.*]] = icmp eq i64 [[V1]], 10 -; CHECK-NEXT: call void @use(i1 [[F_1]]) -; CHECK-NEXT: [[F_2:%.*]] = icmp ult i64 [[V1]], 19 -; CHECK-NEXT: call void @use(i1 [[F_2]]) -; CHECK-NEXT: [[F_3:%.*]] = icmp eq i64 [[V2]], 50 -; CHECK-NEXT: call void @use(i1 [[F_3]]) -; CHECK-NEXT: [[F_4:%.*]] = icmp ugt i64 [[V2]], 301 -; CHECK-NEXT: call void @use(i1 [[F_4]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: call void @use(i1 false) ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[V1]], 25 ; CHECK-NEXT: call void @use(i1 [[C_1]]) ; CHECK-NEXT: [[C_2:%.*]] = icmp ult i64 [[V1]], 25 diff --git a/llvm/test/Transforms/SCCP/ipsccp-cycles.ll b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll new file mode 100644 index 0000000..e4b81fb --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-cycles.ll @@ -0,0 +1,242 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -ipsccp -S | FileCheck %s + +define internal i32 @test1a(i32 %A, i32 %b) { +; CHECK-LABEL: @test1a( +; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], [[B:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] +; CHECK: bb.true: +; CHECK-NEXT: [[R:%.*]] = call i32 @test1a(i32 [[X]], i32 [[B]]) +; CHECK-NEXT: ret i32 [[R]] +; CHECK: bb.false: +; CHECK-NEXT: ret i32 [[A]] +; + %X = add i32 %A, 1 + %c = icmp eq i32 %X, %b + br i1 %c, label %bb.true, label %bb.false + +bb.true: + %r = call i32 @test1a(i32 %X, i32 %b) + ret i32 %r + +bb.false: + ret i32 %A +} + +define i32 @test1b(i32 %b) { +; CHECK-LABEL: @test1b( +; CHECK-NEXT: [[X:%.*]] = call i32 @test1a(i32 17, i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[X]] +; + %X = call i32 @test1a( i32 17, i32 %b) + ret i32 %X +} + +@Getopt.optind = internal global i32 1, align 4 + +define i32 @test2(i32 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* @Getopt.optind, align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LV]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* @Getopt.optind, align 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[ADD]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br label %loop + +loop: + %lv = load i32, i32* @Getopt.optind, align 4 + %add = add i32 %lv, 1 + store i32 %add, i32* @Getopt.optind + %c = icmp eq i32 %add, %a + br i1 %c, label %exit, label %loop + +exit: + ret i32 %add +} + + +define internal i32 @test3a(i32 %a) { +; CHECK-LABEL: @test3a( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = add i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[RES]], 1000 +; CHECK-NEXT: br i1 [[C]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] +; CHECK: bb.true: +; CHECK-NEXT: ret i32 [[RES]] +; CHECK: bb.false: +; CHECK-NEXT: ret i32 0 +; +entry: + %res = add i32 %a, 1 + %c = icmp ult i32 %res, 1000 + br i1 %c, label %bb.true, label %bb.false + +bb.true: + ret i32 %res + +bb.false: + ret i32 0 +} + +define i32 @test3b(i32 %a) { +; CHECK-LABEL: @test3b( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V1:%.*]] = call i32 @test3a(i32 0) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[V2:%.*]] = call i32 @test3a(i32 [[V1]]) +; CHECK-NEXT: [[V3:%.*]] = add i32 [[V2]], 1 +; CHECK-NEXT: [[V4:%.*]] = call i32 @test3a(i32 [[V3]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[V4]], [[A:%.*]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[V4]] +; +entry: + %v1 = call i32 @test3a(i32 0) + br label %loop + +loop: + %v2 = call i32 @test3a(i32 %v1) + %v3 = add i32 %v2, 1 + %v4 = call i32 @test3a(i32 %v3) + %c = icmp eq i32 %v4, %a + br i1 %c, label %exit, label %loop + +exit: + ret i32 %v4 +} + +%struct.S = type { i32, i32 } + +; Check for a range extension cycle through a struct argument. +define internal i32 @test4a(%struct.S %s) { +; CHECK-LABEL: @test4a( +; CHECK-NEXT: [[A:%.*]] = extractvalue [[STRUCT_S:%.*]] %s, 0 +; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_S]] %s, 1 +; CHECK-NEXT: [[X:%.*]] = add i32 [[A]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X]], [[B]] +; CHECK-NEXT: br i1 [[C]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] +; CHECK: bb.true: +; CHECK-NEXT: [[S2:%.*]] = insertvalue [[STRUCT_S]] %s, i32 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = call i32 @test4a(%struct.S [[S2]]) +; CHECK-NEXT: ret i32 [[R]] +; CHECK: bb.false: +; CHECK-NEXT: ret i32 [[A]] +; + %a = extractvalue %struct.S %s, 0 + %b = extractvalue %struct.S %s, 1 + + %x = add i32 %a, 1 + %c = icmp eq i32 %x, %b + br i1 %c, label %bb.true, label %bb.false + +bb.true: + %s2 = insertvalue %struct.S %s, i32 %x, 0 + %r = call i32 @test4a(%struct.S %s2) + ret i32 %r + +bb.false: + ret i32 %a +} + +define i32 @test4b(i32 %b) { +; CHECK-LABEL: @test4b( +; CHECK-NEXT: [[S2:%.*]] = insertvalue [[STRUCT_S:%.*]] { i32 17, i32 undef }, i32 [[B:%.*]], 1 +; CHECK-NEXT: [[X:%.*]] = call i32 @test4a(%struct.S [[S2]]) +; CHECK-NEXT: ret i32 [[X]] +; + %s1 = insertvalue %struct.S undef, i32 17, 0 + %s2 = insertvalue %struct.S %s1, i32 %b, 1 + %X = call i32 @test4a(%struct.S %s2) + ret i32 %X +} + +; Check for a range extension cycle through a returned value. + +define internal i32 @test5a(i8* %arg, i32 %arg1, i32 %arg2) { +; CHECK-LABEL: @test5a( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = icmp eq i8* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[TMP]], label [[BB6:%.*]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @test5a(i8* [[ARG]], i32 0, i32 -1) +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], -1 +; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK: bb6: +; CHECK-NEXT: ret i32 0 +; +bb: + %tmp = icmp eq i8* %arg, null + br i1 %tmp, label %bb6, label %bb3 + +bb3: ; preds = %bb + %tmp4 = tail call i32 @test5a(i8* %arg, i32 %arg1, i32 %arg2) + %tmp5 = add nsw i32 %tmp4, %arg2 + ret i32 %tmp5 + +bb6: ; preds = %bb + ret i32 %arg1 +} + +define void @test5b(i8* %ptr) { +; CHECK-LABEL: @test5b( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @test5a(i8* [[PTR:%.*]], i32 0, i32 -1) +; CHECK-NEXT: ret void +; +bb: + %tmp = tail call i32 @test5a(i8* %ptr, i32 0, i32 -1) + ret void +} + +%struct = type { i32, i32 } + +define internal %struct @test6a(i8* %arg, i32 %arg1, i32 %arg2) { +; CHECK-LABEL: @test6a( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = icmp eq i8* [[ARG:%.*]], null +; CHECK-NEXT: br i1 [[TMP]], label [[BB6:%.*]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[S1:%.*]] = tail call [[STRUCT:%.*]] @test6a(i8* [[ARG]], i32 0, i32 -1) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT]] %s1, 0 +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], -1 +; CHECK-NEXT: [[S2:%.*]] = insertvalue [[STRUCT]] %s1, i32 [[TMP5]], 0 +; CHECK-NEXT: ret [[STRUCT]] %s2 +; CHECK: bb6: +; CHECK-NEXT: ret [[STRUCT]] { i32 0, i32 undef } +; +bb: + %tmp = icmp eq i8* %arg, null + br i1 %tmp, label %bb6, label %bb3 + +bb3: ; preds = %bb + %s1 = tail call %struct @test6a(i8* %arg, i32 %arg1, i32 %arg2) + %tmp4 = extractvalue %struct %s1, 0 + %tmp5 = add nsw i32 %tmp4, %arg2 + %s2 = insertvalue %struct %s1, i32 %tmp5, 0 + ret %struct %s2 + +bb6: ; preds = %bb + %s3 = insertvalue %struct undef, i32 %arg1, 0 + ret %struct %s3 +} + +define void @test6b(i8* %ptr) { +; CHECK-LABEL: @test6b( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = tail call [[STRUCT:%.*]] @test6a(i8* [[PTR:%.*]], i32 0, i32 -1) +; CHECK-NEXT: ret void +; +bb: + %tmp = tail call %struct @test6a(i8* %ptr, i32 0, i32 -1) + ret void +} diff --git a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll index d56c910..e1c7b3d 100644 --- a/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll +++ b/llvm/test/Transforms/SCCP/resolvedundefsin-tracked-fn.ll @@ -386,10 +386,7 @@ define void @test3() { ; CHECK-NEXT: store i32 [[MUL]], i32* @pcount, align 4 ; CHECK-NEXT: ret void ; CHECK: if.end24: -; CHECK-NEXT: [[CMP25474:%.*]] = icmp sgt i32 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[CMP25474]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body: -; CHECK-NEXT: ret void +; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll index 9f5e1c7..8251758 100644 --- a/llvm/test/Transforms/SCCP/widening.ll +++ b/llvm/test/Transforms/SCCP/widening.ll @@ -17,10 +17,8 @@ define void @test_2_incoming_constants(i32 %x) { ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_2_incoming_constants( @@ -32,10 +30,8 @@ define void @test_2_incoming_constants(i32 %x) { ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -68,10 +64,8 @@ define void @test_3_incoming_constants(i32 %x) { ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_3_incoming_constants( @@ -86,10 +80,8 @@ define void @test_3_incoming_constants(i32 %x) { ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -132,10 +124,8 @@ define void @test_5_incoming_constants(i32 %x) { ; SCCP: exit: ; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ] ; SCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; SCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; SCCP-NEXT: call void @use(i1 [[T_1]]) -; SCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; SCCP-NEXT: call void @use(i1 [[F_1]]) +; SCCP-NEXT: call void @use(i1 true) +; SCCP-NEXT: call void @use(i1 false) ; SCCP-NEXT: ret void ; ; IPSCCP-LABEL: @test_5_incoming_constants( @@ -156,10 +146,8 @@ define void @test_5_incoming_constants(i32 %x) { ; IPSCCP: exit: ; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ] ; IPSCCP-NEXT: [[A:%.*]] = add i32 [[P]], 1 -; IPSCCP-NEXT: [[T_1:%.*]] = icmp ult i32 [[A]], 20 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) -; IPSCCP-NEXT: [[F_1:%.*]] = icmp ugt i32 [[A]], 10 -; IPSCCP-NEXT: call void @use(i1 [[F_1]]) +; IPSCCP-NEXT: call void @use(i1 true) +; IPSCCP-NEXT: call void @use(i1 false) ; IPSCCP-NEXT: ret void ; entry: @@ -369,8 +357,7 @@ define void @loop_with_header_1(i32 %x) { ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 2 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 2 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -418,8 +405,7 @@ define void @loop_with_header_2(i32 %x) { ; IPSCCP-NEXT: [[C_1:%.*]] = icmp slt i32 [[IV]], 200 ; IPSCCP-NEXT: br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]] ; IPSCCP: loop.body: -; IPSCCP-NEXT: [[T_1:%.*]] = icmp slt i32 [[IV]], 200 -; IPSCCP-NEXT: call void @use(i1 [[T_1]]) +; IPSCCP-NEXT: call void @use(i1 true) ; IPSCCP-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; IPSCCP-NEXT: br label [[LOOP_HEADER]] ; IPSCCP: exit: @@ -877,3 +863,91 @@ bb66: ; preds = %bb60, %bb35 %tmp67 = phi i8* [ %tmp36, %bb35 ], [ null, %bb60 ] ret i8* %tmp67 } + + +define i32 @loop_with_multiple_euqal_incomings(i32 %N) { +; SCCP-LABEL: @loop_with_multiple_euqal_incomings( +; SCCP-NEXT: entry: +; SCCP-NEXT: br label [[LOOP:%.*]] +; SCCP: loop: +; SCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[P_NEXT:%.*]], [[BB3:%.*]] ], [ 0, [[BB4:%.*]] ], [ 0, [[BB5:%.*]] ], [ 0, [[BB6:%.*]] ] +; SCCP-NEXT: [[C_1:%.*]] = call i1 @cond() +; SCCP-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; SCCP: bb1: +; SCCP-NEXT: [[C_2:%.*]] = call i1 @cond() +; SCCP-NEXT: br i1 [[C_2]], label [[BB3]], label [[BB4]] +; SCCP: bb2: +; SCCP-NEXT: [[C_4:%.*]] = call i1 @cond() +; SCCP-NEXT: br i1 [[C_4]], label [[BB5]], label [[BB6]] +; SCCP: bb3: +; SCCP-NEXT: [[P_NEXT]] = add i32 [[P]], 1 +; SCCP-NEXT: br label [[LOOP]] +; SCCP: bb4: +; SCCP-NEXT: [[C_3:%.*]] = call i1 @cond() +; SCCP-NEXT: br i1 [[C_3]], label [[LOOP]], label [[END:%.*]] +; SCCP: bb5: +; SCCP-NEXT: br label [[LOOP]] +; SCCP: bb6: +; SCCP-NEXT: br label [[LOOP]] +; SCCP: end: +; SCCP-NEXT: ret i32 [[P]] +; +; IPSCCP-LABEL: @loop_with_multiple_euqal_incomings( +; IPSCCP-NEXT: entry: +; IPSCCP-NEXT: br label [[LOOP:%.*]] +; IPSCCP: loop: +; IPSCCP-NEXT: [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[P_NEXT:%.*]], [[BB3:%.*]] ], [ 0, [[BB4:%.*]] ], [ 0, [[BB5:%.*]] ], [ 0, [[BB6:%.*]] ] +; IPSCCP-NEXT: [[C_1:%.*]] = call i1 @cond() +; IPSCCP-NEXT: br i1 [[C_1]], label [[BB1:%.*]], label [[BB2:%.*]] +; IPSCCP: bb1: +; IPSCCP-NEXT: [[C_2:%.*]] = call i1 @cond() +; IPSCCP-NEXT: br i1 [[C_2]], label [[BB3]], label [[BB4]] +; IPSCCP: bb2: +; IPSCCP-NEXT: [[C_4:%.*]] = call i1 @cond() +; IPSCCP-NEXT: br i1 [[C_4]], label [[BB5]], label [[BB6]] +; IPSCCP: bb3: +; IPSCCP-NEXT: [[P_NEXT]] = add i32 [[P]], 1 +; IPSCCP-NEXT: br label [[LOOP]] +; IPSCCP: bb4: +; IPSCCP-NEXT: [[C_3:%.*]] = call i1 @cond() +; IPSCCP-NEXT: br i1 [[C_3]], label [[LOOP]], label [[END:%.*]] +; IPSCCP: bb5: +; IPSCCP-NEXT: br label [[LOOP]] +; IPSCCP: bb6: +; IPSCCP-NEXT: br label [[LOOP]] +; IPSCCP: end: +; IPSCCP-NEXT: ret i32 [[P]] +; +entry: + br label %loop + +loop: + %p = phi i32 [ 0, %entry ], [ %p.next, %bb3 ], [ 0, %bb4 ], [ 0, %bb5], [ 0, %bb6 ] + %c.1 = call i1 @cond() + br i1 %c.1, label %bb1, label %bb2 + +bb1: + %c.2 = call i1 @cond() + br i1 %c.2, label %bb3, label %bb4 + +bb2: + %c.4 = call i1 @cond() + br i1 %c.4, label %bb5, label %bb6 + +bb3: + %p.next = add i32 %p, 1 + br label %loop + +bb4: + %c.3 = call i1 @cond() + br i1 %c.3, label %loop, label %end + +bb5: + br label %loop + +bb6: + br label %loop + +end: + ret i32 %p +} -- 2.7.4