From: Nikita Popov Date: Fri, 24 Mar 2023 14:10:53 +0000 (+0100) Subject: [LICM] Reassociate GEPs to allow hoisting X-Git-Tag: upstream/17.0.6~12077 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b8917ac62ad49a0ce6de026c086599fc5fa35566;p=platform%2Fupstream%2Fllvm.git [LICM] Reassociate GEPs to allow hoisting Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if this would make the inner GEP loop invariant and thus hoistable. This is intended to replace an InstCombine fold that does this (in https://github.com/llvm/llvm-project/blob/04f61fb73dc6a994ab267d431f2fdaedc67430ff/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp#L2006). The problem with the InstCombine fold is that LoopInfo is an optional dependency, so it is not performed reliably. Differential Revision: https://reviews.llvm.org/D146813 --- diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 34b3c1a..8013bb8 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -173,7 +173,8 @@ static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, /// Aggregates various functions for hoisting computations out of loop. static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU); + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT); /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A < /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their /// minimun can be computed outside of loop, and X is not a loop-invariant. @@ -989,7 +990,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // Try to reassociate instructions so that part of computations can be // done out of loop. - if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU)) { + if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) { Changed = true; continue; } @@ -2495,9 +2496,57 @@ static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, return true; } +/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if +/// this allows hoisting the inner GEP. +static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, + MemorySSAUpdater &MSSAU, AssumptionCache *AC, + DominatorTree *DT) { + auto *GEP = dyn_cast(&I); + if (!GEP) + return false; + + auto *Src = dyn_cast(GEP->getPointerOperand()); + if (!Src || !Src->hasOneUse() || !L.contains(Src)) + return false; + + Value *SrcPtr = Src->getPointerOperand(); + auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); }; + if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant)) + return false; + + assert(!all_of(Src->indices(), LoopInvariant) && + "Would have been hoisted already"); + + // The swapped GEPs are inbounds if both original GEPs are inbounds + // and the sign of the offsets is the same. For simplicity, only + // handle both offsets being non-negative. + const DataLayout &DL = GEP->getModule()->getDataLayout(); + auto NonNegative = [&](Value *V) { + return isKnownNonNegative(V, DL, 0, AC, GEP, DT); + }; + bool IsInBounds = Src->isInBounds() && GEP->isInBounds() && + all_of(Src->indices(), NonNegative) && + all_of(GEP->indices(), NonNegative); + + BasicBlock *Preheader = L.getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr, + SmallVector(GEP->indices()), + "invariant.gep", IsInBounds); + Builder.SetInsertPoint(GEP); + Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc, + SmallVector(Src->indices()), "gep", + IsInBounds); + GEP->replaceAllUsesWith(NewGEP); + eraseInstruction(*GEP, SafetyInfo, MSSAU); + eraseInstruction(*Src, SafetyInfo, MSSAU); + return true; +} + static bool hoistArithmetics(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater &MSSAU) { + MemorySSAUpdater &MSSAU, + AssumptionCache *AC, DominatorTree *DT) { // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. @@ -2505,6 +2554,13 @@ static bool hoistArithmetics(Instruction &I, Loop &L, ++NumMinMaxHoisted; return true; } + + // Try to hoist GEPs by reassociation. + if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) { + ++NumHoisted; + return true; + } + return false; } diff --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll index 3d3e10d..18896c2 100644 --- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll +++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll @@ -41,10 +41,9 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: # %if.end9 ; CHECK-NEXT: # -; CHECK-NEXT: add 9, 3, 9 -; CHECK-NEXT: lwz 10, 4(9) +; CHECK-NEXT: lwzx 10, 7, 9 ; CHECK-NEXT: addi 10, 10, 1 -; CHECK-NEXT: stw 10, 4(9) +; CHECK-NEXT: stwx 10, 7, 9 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_5: # %if.then ; CHECK-NEXT: lwax 3, 9, 3 diff --git a/llvm/test/Transforms/LICM/gep-reassociate.ll b/llvm/test/Transforms/LICM/gep-reassociate.ll index 4283eb1..8532974 100644 --- a/llvm/test/Transforms/LICM/gep-reassociate.ll +++ b/llvm/test/Transforms/LICM/gep-reassociate.ll @@ -11,13 +11,13 @@ define void @only_one_inbounds(ptr %ptr, i1 %c, i32 %arg) { ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARG_EXT:%.*]] = zext i32 [[ARG]] to i64 +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[ARG_EXT]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 [[ARG_EXT]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -42,13 +42,13 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c) { ; CHECK-LABEL: define void @both_inbounds_one_neg ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i8, ptr [[PTR2]], i64 -1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -72,13 +72,13 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c) { ; CHECK-LABEL: define void @both_inbounds_pos ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() ; CHECK-NEXT: [[VAL_EXT:%.*]] = zext i32 [[VAL]] to i64 -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[VAL_EXT]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR2]], i64 1 -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[INVARIANT_GEP]], i64 [[VAL_EXT]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -102,12 +102,12 @@ define void @different_elem_types(ptr %ptr, i1 %c, i64 %arg) { ; CHECK-LABEL: define void @different_elem_types ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i64, ptr [[PTR]], i64 [[ARG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[VAL]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i64, ptr [[PTR2]], i64 [[ARG]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -126,6 +126,62 @@ exit: ret void } +define void @different_index_types(ptr %ptr, i1 %c, i32 %arg) { +; CHECK-LABEL: define void @different_index_types +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i32 [[ARG:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[ARG]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %ptr2 = getelementptr i8, ptr %ptr, i64 %val + %ptr3 = getelementptr i8, ptr %ptr2, i32 %arg + call void @use(ptr %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @different_index_count(ptr %ptr, i1 %c, i64 %arg1, i64 %arg2) { +; CHECK-LABEL: define void @different_index_count +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[VAL]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %ptr2 = getelementptr i8, ptr %ptr, i64 %val + %ptr3 = getelementptr [0 x i8], ptr %ptr2, i64 %arg1, i64 %arg2 + call void @use(ptr %ptr3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + define void @src_has_extra_use(ptr %ptr, i1 %c, i64 %arg) { ; CHECK-LABEL: define void @src_has_extra_use ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG:%.*]]) { @@ -246,13 +302,13 @@ define void @multiple_indices(ptr %ptr, i1 %c, i64 %arg1, i64 %arg2) { ; CHECK-LABEL: define void @multiple_indices ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() ; CHECK-NEXT: [[VAL2:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[VAL1]], i64 [[VAL2]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[VAL1]], i64 [[VAL2]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -308,12 +364,12 @@ define void @multiple_indices_very_invariant(ptr %ptr, i1 %c, i64 %arg1, i64 %ar ; CHECK-LABEL: define void @multiple_indices_very_invariant ; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[ARG1:%.*]], i64 [[ARG2:%.*]], i64 [[ARG3:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG1]], i64 [[ARG2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL1:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [0 x i8], ptr [[PTR]], i64 [[ARG3]], i64 [[VAL1]] -; CHECK-NEXT: [[PTR3:%.*]] = getelementptr [0 x i8], ptr [[PTR2]], i64 [[ARG1]], i64 [[ARG2]] -; CHECK-NEXT: call void @use(ptr [[PTR3]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [0 x i8], ptr [[INVARIANT_GEP]], i64 [[ARG3]], i64 [[VAL1]] +; CHECK-NEXT: call void @use(ptr [[GEP]]) ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void