From: Hal Finkel Date: Fri, 20 Feb 2015 03:05:53 +0000 (+0000) Subject: [InstCombine] Remove unnecessary variable indexing into single-element arrays X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=847e05f5695870a5adc0bf17d9207ed395d30d02;p=platform%2Fupstream%2Fllvm.git [InstCombine] Remove unnecessary variable indexing into single-element arrays This change addresses a deficiency pointed out in PR22629. To copy from the bug report: [from the bug report] Consider this code: int f(int x) { int a[] = {12}; return a[x]; } GCC knows to optimize this to movl $12, %eax ret The code generated by recent Clang at -O3 is: movslq %edi, %rax movl .L_ZZ1fiE1a(,%rax,4), %eax retq .L_ZZ1fiE1a: .long 12 # 0xc [end from the bug report] This definitely seems worth fixing. I've also seen this kind of code before (as the base case of generic vector wrapper templates with one element). The general idea is to look at the GEP feeding a load or a store, which has some variable as its first non-zero index, and determine if that index must be zero (or else an out-of-bounds access would occur). We can do this for allocas and globals with constant initializers where we know the maximum size of the underlying object. When we find such a GEP, we create a new one for the memory access with that first variable index replaced with a constant zero. Even if we can't eliminate the memory access (and sometimes we can't), it is still useful because it removes unnecessary indexing calculations. llvm-svn: 229959 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 1dbe607..1b99d87 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -468,6 +468,181 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { return nullptr; } +// If we can determine that all possible objects pointed to by the provided +// pointer value are, not only dereferenceable, but also definitively less than +// or equal to the provided maximum size, then return true. Otherwise, return +// false (constant global values and allocas fall into this category). +// +// FIXME: This should probably live in ValueTracking (or similar). +static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, + const DataLayout *DL) { + SmallPtrSet Visited; + SmallVector Worklist(1, V); + + do { + Value *P = Worklist.pop_back_val(); + P = P->stripPointerCasts(); + + if (!Visited.insert(P).second) + continue; + + if (SelectInst *SI = dyn_cast(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (PHINode *PN = dyn_cast(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + if (GlobalAlias *GA = dyn_cast(P)) { + if (GA->mayBeOverridden()) + return false; + Worklist.push_back(GA->getAliasee()); + continue; + } + + // If we know how big this object is, and it is less than MaxSize, continue + // searching. Otherwise, return false. + if (AllocaInst *AI = dyn_cast(P)) { + if (!AI->getAllocatedType()->isSized()) + return false; + + ConstantInt *CS = dyn_cast(AI->getArraySize()); + if (!CS) + return false; + + uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType()); + // Make sure that, even if the multiplication below would wrap as an + // uint64_t, we still do the right thing. + if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize)) + return false; + continue; + } + + if (GlobalVariable *GV = dyn_cast(P)) { + if (!GV->hasDefinitiveInitializer() || !GV->isConstant()) + return false; + + uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType()); + if (InitSize > MaxSize) + return false; + continue; + } + + return false; + } while (!Worklist.empty()); + + return true; +} + +// If we're indexing into an object of a known size, and the outer index is +// not a constant, but having any value but zero would lead to undefined +// behavior, replace it with zero. +// +// For example, if we have: +// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4 +// ... +// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x +// ... = load i32* %arrayidx, align 4 +// Then we know that we can replace %x in the GEP with i64 0. +// +// FIXME: We could fold any GEP index to zero that would cause UB if it were +// not zero. Currently, we only handle the first such index. Also, we could +// also search through non-zero constant indices if we kept track of the +// offsets those indices implied. +static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, + Instruction *MemI, unsigned &Idx) { + const DataLayout *DL = IC.getDataLayout(); + if (GEPI->getNumOperands() < 2 || !DL) + return false; + + // Find the first non-zero index of a GEP. If all indices are zero, return + // one past the last index. + auto FirstNZIdx = [](const GetElementPtrInst *GEPI) { + unsigned I = 1; + for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) { + Value *V = GEPI->getOperand(I); + if (const ConstantInt *CI = dyn_cast(V)) + if (CI->isZero()) + continue; + + break; + } + + return I; + }; + + // Skip through initial 'zero' indices, and find the corresponding pointer + // type. See if the next index is not a constant. + Idx = FirstNZIdx(GEPI); + if (Idx == GEPI->getNumOperands()) + return false; + if (isa(GEPI->getOperand(Idx))) + return false; + + SmallVector Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx); + Type *AllocTy = + GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops); + if (!AllocTy || !AllocTy->isSized()) + return false; + uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy); + + // If there are more indices after the one we might replace with a zero, make + // sure they're all non-negative. If any of them are negative, the overall + // address being computed might be before the base address determined by the + // first non-zero index. + auto IsAllNonNegative = [&]() { + for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { + bool KnownNonNegative, KnownNegative; + IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative, + KnownNegative, 0, MemI); + if (KnownNonNegative) + continue; + return false; + } + + return true; + }; + + // FIXME: If the GEP is not inbounds, and there are extra indices after the + // one we'll replace, those could cause the address computation to wrap + // (rendering the IsAllNonNegative() check below insufficient). We can do + // better, ignoring zero indicies (and other indicies we can prove small + // enough not to wrap). + if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds()) + return false; + + // Note that isObjectSizeLessThanOrEq will return true only if the pointer is + // also known to be dereferenceable. + return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) && + IsAllNonNegative(); +} + +// If we're indexing into an object with a variable index for the memory +// access, but the object has only one element, we can assume that the index +// will always be zero. If we replace the GEP, return it. +template +static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, + T &MemI) { + if (GetElementPtrInst *GEPI = dyn_cast(Ptr)) { + unsigned Idx; + if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) { + Instruction *NewGEPI = GEPI->clone(); + NewGEPI->setOperand(Idx, + ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0)); + NewGEPI->insertBefore(GEPI); + MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI); + return NewGEPI; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); @@ -489,6 +664,12 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { LI.setAlignment(EffectiveLoadAlign); } + // Replace GEP indices if possible. + if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) { + Worklist.Add(NewGEPI); + return &LI; + } + // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. if (!LI.isSimple()) return nullptr; @@ -663,6 +844,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { SI.setAlignment(EffectiveStoreAlign); } + // Replace GEP indices if possible. + if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { + Worklist.Add(NewGEPI); + return &SI; + } + // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. if (!SI.isSimple()) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll index 9810026..40673a7 100644 --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -230,7 +230,7 @@ define i1 @test10_struct(i32 %x) { ; NODL: getelementptr inbounds %Foo* @GS, i32 %x, i32 0 ; P32-LABEL: @test10_struct( -; P32: getelementptr inbounds %Foo* @GS, i32 %x, i32 0 +; P32: ret i1 false %p = getelementptr inbounds %Foo* @GS, i32 %x, i32 0 %q = load i32* %p %r = icmp eq i32 %q, 9 @@ -256,8 +256,7 @@ define i1 @test10_struct_i16(i16 %x){ ; NODL: getelementptr inbounds %Foo* @GS, i16 %x, i32 0 ; P32-LABEL: @test10_struct_i16( -; P32: %1 = sext i16 %x to i32 -; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0 +; P32: ret i1 false %p = getelementptr inbounds %Foo* @GS, i16 %x, i32 0 %q = load i32* %p %r = icmp eq i32 %q, 0 @@ -271,8 +270,7 @@ define i1 @test10_struct_i64(i64 %x){ ; NODL: getelementptr inbounds %Foo* @GS, i64 %x, i32 0 ; P32-LABEL: @test10_struct_i64( -; P32: %1 = trunc i64 %x to i32 -; P32: getelementptr inbounds %Foo* @GS, i32 %1, i32 0 +; P32: ret i1 false %p = getelementptr inbounds %Foo* @GS, i64 %x, i32 0 %q = load i32* %p %r = icmp eq i32 %q, 0 diff --git a/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll new file mode 100644 index 0000000..9141d99 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll @@ -0,0 +1,48 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4 +@f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4 + +define signext i32 @test1(i32 signext %x) #0 { +entry: + %idxprom = sext i32 %x to i64 + %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %idxprom + %0 = load i32* %arrayidx, align 4 + ret i32 %0 + +; CHECK-LABEL: @test1 +; CHECK: ret i32 12 +} + +declare void @foo(i64* %p) +define void @test2(i32 signext %x, i64 %v) #0 { +entry: + %p = alloca i64 + %idxprom = sext i32 %x to i64 + %arrayidx = getelementptr inbounds i64* %p, i64 %idxprom + store i64 %v, i64* %arrayidx + call void @foo(i64* %p) + ret void + +; CHECK-LABEL: @test2 +; CHECK: %p = alloca i64 +; CHECK: store i64 %v, i64* %p +; CHECK: ret void +} + +define signext i32 @test3(i32 signext %x, i1 %y) #0 { +entry: + %idxprom = sext i32 %x to i64 + %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b + %arrayidx = getelementptr inbounds [1 x i32]* %p, i64 0, i64 %idxprom + %0 = load i32* %arrayidx, align 4 + ret i32 %0 + +; CHECK-LABEL: @test3 +; CHECK: getelementptr inbounds [1 x i32]* %p, i64 0, i64 0 +} + +attributes #0 = { nounwind readnone } +