From: Max Kazantsev Date: Mon, 20 Feb 2023 10:24:49 +0000 (+0700) Subject: [LICM] Simplify (X < A && X < B) into (X < MIN(A, B)) if MIN(A, B) is loop-invariant X-Git-Tag: upstream/17.0.6~15269 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6b03ce374e0dc64868b4b6665056dfc3fda0e98f;p=platform%2Fupstream%2Fllvm.git [LICM] Simplify (X < A && X < B) into (X < MIN(A, B)) if MIN(A, B) is loop-invariant We don't do this transform in InstCombine in general case for arbitrary values, because cost of AND and 2 ICMP's isn't higher than of MIN and ICMP. However, LICM also has a notion about the loop structure. This transform becomes profitable if `A` and `B` are loop-invariant and `X` is not: by doing this, we can compute min outside the loop. Differential Revision: https://reviews.llvm.org/D143726 Reviewed By: nikic --- diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 376f20a..78b05ba 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -68,6 +68,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -102,6 +103,8 @@ STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); STATISTIC(NumPromotionCandidates, "Number of promotion candidates"); STATISTIC(NumLoadPromoted, "Number of load-only promotions"); STATISTIC(NumLoadStorePromoted, "Number of load and store promotions"); +STATISTIC(NumMinMaxHoisted, + "Number min/max expressions hoisted out of the loop"); /// Memory promotion is enabled by default. static cl::opt @@ -167,6 +170,11 @@ static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, bool InvariantGroup); static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU); +/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A < +/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their +/// minimun can be computed outside of loop, and X is not a loop-invariant. +static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU); @@ -981,6 +989,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, } } + // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them + // into (x < min(INV1, INV2)), and hoisting the invariant part of this + // expression out of the loop. + if (hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) { + ++NumMinMaxHoisted; + Changed = true; + continue; + } + // Remember possibly hoistable branches so we can actually hoist them // later if needed. if (BranchInst *BI = dyn_cast(&I)) @@ -2396,6 +2413,68 @@ bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) { return false; } +static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU) { + auto *Preheader = L.getLoopPreheader(); + assert(Preheader && "Loop is not in simplify form?"); + bool Inverse = false; + using namespace PatternMatch; + Value *Cond1, *Cond2; + if (match(&I, m_Or(m_Value(Cond1), m_Value(Cond2)))) + Inverse = true; + else if (!match(&I, m_And(m_Value(Cond1), m_Value(Cond2)))) + return false; + + auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P, + Value *&LHS, Value *&RHS) { + if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS))))) + return false; + if (!ICmpInst::isRelational(P)) + return false; + if (L.isLoopInvariant(LHS)) { + std::swap(LHS, RHS); + P = ICmpInst::getSwappedPredicate(P); + } + if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS)) + return false; + if (Inverse) + P = ICmpInst::getInversePredicate(P); + return true; + }; + ICmpInst::Predicate P1, P2; + Value *LHS1, *LHS2, *RHS1, *RHS2; + if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) || + !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2)) + return false; + if (P1 != P2 || LHS1 != LHS2) + return false; + + // Everything is fine, we can do the transform. + bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1); + assert( + (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) && + "Relational predicate is either less (or equal) or greater (or equal)!"); + Intrinsic::ID id = ICmpInst::isSigned(P1) + ? (UseMin ? Intrinsic::smin : Intrinsic::smax) + : (UseMin ? Intrinsic::umin : Intrinsic::umax); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewRHS = Builder.CreateBinaryIntrinsic( + id, RHS1, RHS2, nullptr, StringRef("invariant.") + + (ICmpInst::isSigned(P1) ? "s" : "u") + + (UseMin ? "min" : "max")); + Builder.SetInsertPoint(&I); + ICmpInst::Predicate P = P1; + if (Inverse) + P = ICmpInst::getInversePredicate(P); + Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS); + NewCond->takeName(&I); + I.replaceAllUsesWith(NewCond); + eraseInstruction(I, SafetyInfo, MSSAU); + eraseInstruction(*cast(Cond1), SafetyInfo, MSSAU); + eraseInstruction(*cast(Cond2), SafetyInfo, MSSAU); + return true; +} + /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index e8ac9a1..9d06212 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -521,40 +521,37 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) # ; GFX1032-NEXT: s_cbranch_execz .LBB11_6 ; GFX1032-NEXT: ; %bb.1: ; %.preheader ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: s_mov_b32 s3, 1 -; GFX1032-NEXT: ; implicit-def: $sgpr4 +; GFX1032-NEXT: v_min_u32_e32 v1, 0x100, v0 +; GFX1032-NEXT: v_mov_b32_e32 v2, 0 +; GFX1032-NEXT: s_mov_b32 s4, 0 +; GFX1032-NEXT: ; implicit-def: $sgpr3 ; GFX1032-NEXT: s_branch .LBB11_4 ; GFX1032-NEXT: .LBB11_2: ; %bb8 ; GFX1032-NEXT: ; in Loop: Header=BB11_4 Depth=1 -; GFX1032-NEXT: v_cmp_ge_u32_e32 vcc_lo, s3, v0 -; GFX1032-NEXT: s_cmpk_gt_u32 s3, 0xff -; GFX1032-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032-NEXT: s_cselect_b32 s5, -1, 0 -; GFX1032-NEXT: s_add_i32 s3, s3, 1 -; GFX1032-NEXT: s_or_b32 s5, s5, vcc_lo -; GFX1032-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1032-NEXT: s_add_i32 s4, s4, 1 +; GFX1032-NEXT: global_store_dword v2, v0, s[0:1] +; GFX1032-NEXT: v_cmp_ge_u32_e32 vcc_lo, s4, v1 ; GFX1032-NEXT: s_add_u32 s0, s0, 4 ; GFX1032-NEXT: s_addc_u32 s1, s1, 0 -; GFX1032-NEXT: s_andn2_b32 s4, s4, exec_lo -; GFX1032-NEXT: s_and_b32 s5, s5, exec_lo -; GFX1032-NEXT: s_or_b32 s4, s4, s5 +; GFX1032-NEXT: s_andn2_b32 s3, s3, exec_lo +; GFX1032-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; GFX1032-NEXT: s_or_b32 s3, s3, s5 ; GFX1032-NEXT: .LBB11_3: ; %Flow ; GFX1032-NEXT: ; in Loop: Header=BB11_4 Depth=1 -; GFX1032-NEXT: s_and_b32 s5, exec_lo, s4 +; GFX1032-NEXT: s_and_b32 s5, exec_lo, s3 ; GFX1032-NEXT: s_or_b32 s2, s5, s2 ; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2 ; GFX1032-NEXT: s_cbranch_execz .LBB11_6 ; GFX1032-NEXT: .LBB11_4: ; %bb2 ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dword v2, v1, s[0:1] -; GFX1032-NEXT: s_or_b32 s4, s4, exec_lo +; GFX1032-NEXT: global_load_dword v3, v2, s[0:1] +; GFX1032-NEXT: s_or_b32 s3, s3, exec_lo ; GFX1032-NEXT: s_waitcnt vmcnt(0) -; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 11, v2 +; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 11, v3 ; GFX1032-NEXT: s_cbranch_vccz .LBB11_2 ; GFX1032-NEXT: ; %bb.5: ; in Loop: Header=BB11_4 Depth=1 -; GFX1032-NEXT: ; implicit-def: $sgpr3 +; GFX1032-NEXT: ; implicit-def: $sgpr4 ; GFX1032-NEXT: ; implicit-def: $sgpr0_sgpr1 ; GFX1032-NEXT: s_branch .LBB11_3 ; GFX1032-NEXT: .LBB11_6: ; %.loopexit @@ -563,28 +560,25 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) # ; GFX1064-LABEL: test_loop_with_if_else_break: ; GFX1064: ; %bb.0: ; %bb ; GFX1064-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX1064-NEXT: s_mov_b32 s6, 0 ; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB11_6 ; GFX1064-NEXT: ; %bb.1: ; %.preheader ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: s_mov_b32 s6, 1 +; GFX1064-NEXT: v_min_u32_e32 v1, 0x100, v0 +; GFX1064-NEXT: v_mov_b32_e32 v2, 0 ; GFX1064-NEXT: s_mov_b64 s[2:3], 0 ; GFX1064-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX1064-NEXT: s_branch .LBB11_4 ; GFX1064-NEXT: .LBB11_2: ; %bb8 ; GFX1064-NEXT: ; in Loop: Header=BB11_4 Depth=1 -; GFX1064-NEXT: v_cmp_ge_u32_e32 vcc, s6, v0 -; GFX1064-NEXT: s_cmpk_gt_u32 s6, 0xff -; GFX1064-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GFX1064-NEXT: s_add_i32 s6, s6, 1 -; GFX1064-NEXT: s_or_b64 s[8:9], s[8:9], vcc -; GFX1064-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1064-NEXT: global_store_dword v2, v0, s[0:1] +; GFX1064-NEXT: v_cmp_ge_u32_e32 vcc, s6, v1 ; GFX1064-NEXT: s_add_u32 s0, s0, 4 ; GFX1064-NEXT: s_addc_u32 s1, s1, 0 ; GFX1064-NEXT: s_andn2_b64 s[4:5], s[4:5], exec -; GFX1064-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GFX1064-NEXT: s_and_b64 s[8:9], vcc, exec ; GFX1064-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX1064-NEXT: .LBB11_3: ; %Flow ; GFX1064-NEXT: ; in Loop: Header=BB11_4 Depth=1 @@ -595,10 +589,10 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) # ; GFX1064-NEXT: .LBB11_4: ; %bb2 ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dword v2, v1, s[0:1] +; GFX1064-NEXT: global_load_dword v3, v2, s[0:1] ; GFX1064-NEXT: s_or_b64 s[4:5], s[4:5], exec ; GFX1064-NEXT: s_waitcnt vmcnt(0) -; GFX1064-NEXT: v_cmp_gt_i32_e32 vcc, 11, v2 +; GFX1064-NEXT: v_cmp_gt_i32_e32 vcc, 11, v3 ; GFX1064-NEXT: s_cbranch_vccz .LBB11_2 ; GFX1064-NEXT: ; %bb.5: ; in Loop: Header=BB11_4 Depth=1 ; GFX1064-NEXT: ; implicit-def: $sgpr6 diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll index 5b8742b..33dfd22 100644 --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -13,18 +13,16 @@ define void @print_res() nounwind { ; CHECK-NEXT: lwz 3, 0(3) ; CHECK-NEXT: addi 3, 3, -1 ; CHECK-NEXT: clrldi 4, 3, 32 -; CHECK-NEXT: cmplwi 3, 1 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: iselgt 3, 4, 3 -; CHECK-NEXT: li 4, 2 -; CHECK-NEXT: addi 3, 3, -1 -; CHECK-NEXT: cmpldi 3, 2 -; CHECK-NEXT: isellt 3, 3, 4 +; CHECK-NEXT: cmplwi 3, 3 +; CHECK-NEXT: li 3, 3 +; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: li 4, 1 +; CHECK-NEXT: cmpldi 3, 1 +; CHECK-NEXT: iselgt 3, 3, 4 ; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: bdz .LBB0_6 diff --git a/llvm/test/Transforms/LICM/min_max.ll b/llvm/test/Transforms/LICM/min_max.ll index d47f2d1..d9710da 100644 --- a/llvm/test/Transforms/LICM/min_max.ll +++ b/llvm/test/Transforms/LICM/min_max.ll @@ -1,16 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S %s -passes='loop-mssa(licm)' -verify-memoryssa | FileCheck %s -; TODO: turn to %iv u umax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop. define i32 @test_ugt(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ugt( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -156,16 +151,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop. define i32 @test_uge(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_uge( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -187,16 +181,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop. define i32 @test_sgt(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sgt( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -218,16 +211,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop. define i32 @test_sge(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sge( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -249,16 +241,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_ult_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ult_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -280,16 +271,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_ule_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ule_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ule i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -311,16 +301,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_slt_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_slt_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -342,16 +331,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_sle_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sle_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sle i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -373,16 +361,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_ugt_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ugt_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMIN]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -404,16 +391,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_uge_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_uge_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMIN]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -435,16 +421,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_sgt_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sgt_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMIN]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -466,16 +451,15 @@ exit: ret i32 %iv } -; TODO: Turn OR to AND and handle accordingly. +; Turn OR to AND and handle accordingly. define i32 @test_sge_inv(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sge_inv( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMIN]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -497,16 +481,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv u umax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop. define i32 @test_ugt_swapped(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ugt_swapped( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[INV_1:%.*]], [[IV]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[INV_2:%.*]], [[IV]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -652,16 +631,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop. define i32 @test_uge_swapped(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_uge_swapped( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[INV_1:%.*]], [[IV]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[INV_2:%.*]], [[IV]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -683,16 +661,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop. define i32 @test_sgt_swapped(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sgt_swapped( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[INV_1:%.*]], [[IV]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[INV_2:%.*]], [[IV]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -714,16 +691,15 @@ exit: ret i32 %iv } -; TODO: turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop. +; turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop. define i32 @test_sge_swapped(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_sge_swapped( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[INV_1:%.*]], [[IV]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[INV_2:%.*]], [[IV]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMAX]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -1128,16 +1104,15 @@ exit: ret i32 %iv } -; TODO: This can be optimized, despite the fact that loop_cond has other uses. +; This can be optimized, despite the fact that loop_cond has other uses. define i32 @test_ult_extra_use_result_pos(i32 %start, i32 %inv_1, i32 %inv_2) { ; CHECK-LABEL: @test_ult_extra_use_result_pos( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]]) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV]], [[INV_1:%.*]] -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV]], [[INV_2:%.*]] -; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMIN]] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: