From 2e42c34d058e5b068ec1cd6dae6cd388937c6e3c Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 10 Jul 2014 05:27:53 +0000 Subject: [PATCH] Allow isDereferenceablePointer to look through some bitcasts isDereferenceablePointer should not give up upon encountering any bitcast. If we're casting from a pointer to a larger type to a pointer to a small type, we can continue by examining the bitcast's operand. This missing capability was noted in a comment in the function. In order for this to work, isDereferenceablePointer now takes an optional DataLayout pointer (essentially all callers already had such a pointer available). Most code uses isDereferenceablePointer though isSafeToSpeculativelyExecute (which already took an optional DataLayout pointer), and to enable the LICM test case, LICM needs to actually provide its DL pointer to isSafeToSpeculativelyExecute (which it was not doing previously). llvm-svn: 212686 --- llvm/include/llvm/IR/Value.h | 2 +- llvm/lib/Analysis/ValueTracking.cpp | 2 +- llvm/lib/IR/Value.cpp | 31 ++-- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 14 +- llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- llvm/lib/Transforms/Scalar/SROA.cpp | 6 +- .../lib/Transforms/Scalar/ScalarReplAggregates.cpp | 6 +- llvm/test/Transforms/LICM/hoist-bitcast-load.ll | 160 +++++++++++++++++++++ 8 files changed, 202 insertions(+), 21 deletions(-) create mode 100644 llvm/test/Transforms/LICM/hoist-bitcast-load.ll diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index aae39cc..b5bbc96 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -430,7 +430,7 @@ public: /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. - bool isDereferenceablePointer() const; + bool isDereferenceablePointer(const DataLayout *DL = nullptr) const; /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, /// return the value in the PHI node corresponding to PredBB. If not, return diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e664454..5264745 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2007,7 +2007,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(); + return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 463024a..d61b8e5 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" @@ -472,18 +473,32 @@ Value *Value::stripInBoundsOffsets() { /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -static bool isDereferenceablePointer(const Value *V, +static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, SmallPtrSet &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // It's also not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. Some cases could - // be handled using DataLayout to check sizes and alignments though. // These are obviously ok. if (isa(V)) return true; + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (DL) + if (const BitCastInst* BC = dyn_cast(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if ((DL->getTypeStoreSize(STy) >= + DL->getTypeStoreSize(DTy)) && + (DL->getABITypeAlignment(STy) >= + DL->getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + } + // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); @@ -497,7 +512,7 @@ static bool isDereferenceablePointer(const Value *V, // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0))) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -533,9 +548,9 @@ static bool isDereferenceablePointer(const Value *V, /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -bool Value::isDereferenceablePointer() const { +bool Value::isDereferenceablePointer(const DataLayout *DL) const { SmallPtrSet Visited; - return ::isDereferenceablePointer(this, Visited); + return ::isDereferenceablePointer(this, DL, Visited); } /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 97a119b..f9de54a 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -68,13 +69,14 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { + : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector IndicesVector; + const DataLayout *DL; private: CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -103,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -218,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg, + const DataLayout *DL) { Function *Callee = Arg->getParent(); unsigned ArgNo = Arg->getArgNo(); @@ -229,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL)) return false; } return true; @@ -337,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index bc1db37..abcceb2 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -639,7 +639,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + if (isSafeToSpeculativelyExecute(&Inst, DL)) return true; return isGuaranteedToExecute(Inst); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 6532b7a..8c7f253 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1130,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) continue; @@ -1198,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); - bool TDerefable = TValue->isDereferenceablePointer(); - bool FDerefable = FValue->isDereferenceablePointer(); + bool TDerefable = TValue->isDereferenceablePointer(DL); + bool FDerefable = FValue->isDereferenceablePointer(DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast(U); diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 58192fc..e2a24a7 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1142,8 +1142,8 @@ public: /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { - bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); - bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); for (User *U : SI->users()) { LoadInst *LI = dyn_cast(U); @@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) continue; diff --git a/llvm/test/Transforms/LICM/hoist-bitcast-load.ll b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll new file mode 100644 index 0000000..bb105e5 --- /dev/null +++ b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure the basic alloca pointer hoisting works: +; CHECK-LABEL: @test1 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %c = alloca i32 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type: +; CHECK-LABEL: @test2 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + %c = bitcast i64* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type (where the bitcast also needs to be hoisted): +; CHECK-LABEL: @test3 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %c = bitcast i64* %ca to i32* + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting does not happen through a bitcast +; to a pointer to a larger type: +; CHECK-LABEL: @test4 +; CHECK: for.body: +; CHECK: load i32* %c, align 4 + +; Function Attrs: nounwind uwtable +define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i16 + %c = bitcast i16* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } + -- 2.7.4