From eeec35ae1cce76d8f0d4fc35d231deebf2fa5f38 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 20 Oct 2014 00:24:14 +0000 Subject: [PATCH] Teach the load analysis driving core instcombine logic and other bits of logic to look through pointer casts, making them trivially stronger in the face of loads and stores with intervening pointer casts. I've included a few test cases that demonstrate the kind of folding instcombine can do without pointer casts and then variations which obfuscate the logic through bitcasts. Without this patch, the variations all fail to optimize fully. This is more important now than it has been in the past as I've started moving the load canonicialization to more closely follow the value type requirements rather than the pointer type requirements and thus this needs to be prepared for more pointer casts. When I made the same change to stores several test cases regressed without logic along these lines so I wanted to systematically improve matters first. llvm-svn: 220178 --- llvm/lib/Analysis/Loads.cpp | 64 ++++++++---- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 3 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 11 ++- llvm/test/Transforms/InstCombine/select.ll | 109 +++++++++++++++++++++ 4 files changed, 164 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 4838d85..65a30f2 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -86,6 +86,9 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, } } + PointerType *AddrTy = cast(V->getType()); + uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0; + // If we found a base allocated type from either an alloca or global variable, // try to see if we are definitively within the allocated region. We need to // know the size of the base type and the loaded type to do anything in this @@ -96,8 +99,6 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = DL->getTypeStoreSize(AddrTy->getElementType()); if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; @@ -111,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // the load entirely). BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + // We can at least always strip pointer casts even though we can't use the + // base here. + V = V->stripPointerCasts(); + while (BBI != E) { --BBI; @@ -120,13 +125,25 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, !isa(BBI)) return false; - if (LoadInst *LI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(LI->getOperand(0), V)) - return true; - } else if (StoreInst *SI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(SI->getOperand(1), V)) - return true; - } + Value *AccessedPtr; + if (LoadInst *LI = dyn_cast(BBI)) + AccessedPtr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(BBI)) + AccessedPtr = SI->getPointerOperand(); + else + continue; + + // Handle trivial cases even w/o DataLayout or other work. + if (AccessedPtr == V) + return true; + + if (!DL) + continue; + + auto *AccessedTy = cast(AccessedPtr->getType()); + if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && + LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType())) + return true; } return false; } @@ -157,12 +174,12 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + Type *AccessTy = cast(Ptr->getType())->getElementType(); + // If we're using alias analysis to disambiguate get the size of *Ptr. - uint64_t AccessSize = 0; - if (AA) { - Type *AccessTy = cast(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } + uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0; + + Value *StrippedPtr = Ptr->stripPointerCasts(); while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they @@ -183,17 +200,21 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (AreEquivalentAddressValues( + LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + CastInst::isBitCastable(LI->getType(), AccessTy)) { if (AATags) LI->getAAMetadata(*AATags); return LI; } if (StoreInst *SI = dyn_cast(Inst)) { + Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (AreEquivalentAddressValues(StorePtr, StrippedPtr) && + CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) { if (AATags) SI->getAAMetadata(*AATags); return SI->getOperand(0); @@ -202,15 +223,15 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important // for reg2mem'd code. - if ((isa(Ptr) || isa(Ptr)) && - (isa(SI->getOperand(1)) || - isa(SI->getOperand(1)))) + if ((isa(StrippedPtr) || isa(StrippedPtr)) && + (isa(StorePtr) || isa(StorePtr))) continue; // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; // Otherwise the store that may or may not alias the pointer, bail out. @@ -223,7 +244,8 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; // May modify the pointer, bail out. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 8e13dde..32ac62e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -420,7 +420,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // separated by a few arithmetic operations. BasicBlock::iterator BBI = &LI; if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) - return ReplaceInstUsesWith(LI, AvailableVal); + return ReplaceInstUsesWith( + LI, Builder->CreateBitCast(AvailableVal, LI.getType())); // load(gep null, ...) -> unreachable if (GetElementPtrInst *GEPI = dyn_cast(Op)) { diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 924d4e1..fbb5c20 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -901,6 +901,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); + if (AvailableVal->getType() != LI->getType()) + AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal, + LI->getType(), "", LI); LI->replaceAllUsesWith(AvailableVal); LI->eraseFromParent(); return true; @@ -1031,7 +1034,13 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); - PN->addIncoming(I->second, I->first); + // If we have an available predecessor but it requires casting, insert the + // cast in the predecessor and use the cast. + Value *PredV = I->second; + if (PredV->getType() != LI->getType()) + PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "", P); + + PN->addIncoming(PredV, I->first); } //cerr << "PRE: " << *LI << *PN << "\n"; diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 05f65ef..6cf9f0f 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1276,3 +1276,112 @@ define i32 @test77(i1 %flag, i32* %x) { %v = load i32* %p ret i32 %v } + +define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) { +; Test that we can speculate the loads around the select even when we can't +; fold the load completely away. +; CHECK-LABEL: @test78( +; CHECK: %[[V1:.*]] = load i32* %x +; CHECK-NEXT: %[[V2:.*]] = load i32* %y +; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]] +; CHECK-NEXT: ret i32 %[[S]] +entry: + store i32 0, i32* %x + store i32 0, i32* %y + ; Block forwarding by storing to %z which could alias either %x or %y. + store i32 42, i32* %z + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32* %p + ret i32 %v +} + +define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) { +; Test that we can speculate the loads around the select even when we can't +; fold the load completely away. +; CHECK-LABEL: @test79( +; CHECK: %[[V1:.*]] = load float* %x +; CHECK-NEXT: %[[V2:.*]] = load float* %y +; CHECK-NEXT: %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]] +; CHECK-NEXT: ret float %[[S]] +entry: + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + store i32 0, i32* %x1 + store i32 0, i32* %y + ; Block forwarding by storing to %z which could alias either %x or %y. + store i32 42, i32* %z + %p = select i1 %flag, float* %x, float* %y1 + %v = load float* %p + ret float %v +} + +define i32 @test80(i1 %flag) { +; Test that when we speculate the loads around the select they fold throug +; load->load folding and load->store folding. +; CHECK-LABEL: @test80( +; CHECK: %[[X:.*]] = alloca i32 +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK: %[[V:.*]] = load i32* %[[X]] +; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]] +; CHECK-NEXT: ret i32 %[[V]] +entry: + %x = alloca i32 + %y = alloca i32 + call void @scribble_on_memory(i32* %x) + call void @scribble_on_memory(i32* %y) + %tmp = load i32* %x + store i32 %tmp, i32* %y + %p = select i1 %flag, i32* %x, i32* %y + %v = load i32* %p + ret i32 %v +} + +define float @test81(i1 %flag) { +; Test that we can speculate the load around the select even though they use +; differently typed pointers. +; CHECK-LABEL: @test81( +; CHECK: %[[X:.*]] = alloca i32 +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK: %[[V:.*]] = load i32* %[[X]] +; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]] +; CHECK-NEXT: %[[C:.*]] = bitcast i32 %[[V]] to float +; CHECK-NEXT: ret float %[[C]] +entry: + %x = alloca float + %y = alloca i32 + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + call void @scribble_on_memory(i32* %x1) + call void @scribble_on_memory(i32* %y) + %tmp = load i32* %x1 + store i32 %tmp, i32* %y + %p = select i1 %flag, float* %x, float* %y1 + %v = load float* %p + ret float %v +} + +define i32 @test82(i1 %flag) { +; Test that we can speculate the load around the select even though they use +; differently typed pointers. +; CHECK-LABEL: @test82( +; CHECK: %[[X:.*]] = alloca float +; CHECK-NEXT: %[[Y:.*]] = alloca i32 +; CHECK-NEXT: %[[X1:.*]] = bitcast float* %[[X]] to i32* +; CHECK-NEXT: %[[Y1:.*]] = bitcast i32* %[[Y]] to float* +; CHECK: %[[V:.*]] = load float* %[[X]] +; CHECK-NEXT: store float %[[V]], float* %[[Y1]] +; CHECK-NEXT: %[[C:.*]] = bitcast float %[[V]] to i32 +; CHECK-NEXT: ret i32 %[[C]] +entry: + %x = alloca float + %y = alloca i32 + %x1 = bitcast float* %x to i32* + %y1 = bitcast i32* %y to float* + call void @scribble_on_memory(i32* %x1) + call void @scribble_on_memory(i32* %y) + %tmp = load float* %x + store float %tmp, float* %y1 + %p = select i1 %flag, i32* %x1, i32* %y + %v = load i32* %p + ret i32 %v +} -- 2.7.4