}
}
+ PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
+
// If we found a base allocated type from either an alloca or global variable,
// try to see if we are definitively within the allocated region. We need to
// know the size of the base type and the loaded type to do anything in this
if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL->getTypeStoreSize(AddrTy->getElementType());
if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
// the load entirely).
BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ // We can at least always strip pointer casts even though we can't use the
+ // base here.
+ V = V->stripPointerCasts();
+
while (BBI != E) {
--BBI;
!isa<DbgInfoIntrinsic>(BBI))
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (AreEquivalentAddressValues(LI->getOperand(0), V))
- return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (AreEquivalentAddressValues(SI->getOperand(1), V))
- return true;
- }
+ Value *AccessedPtr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ AccessedPtr = LI->getPointerOperand();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ AccessedPtr = SI->getPointerOperand();
+ else
+ continue;
+
+ // Handle trivial cases even w/o DataLayout or other work.
+ if (AccessedPtr == V)
+ return true;
+
+ if (!DL)
+ continue;
+
+ auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
+ if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
+ LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ return true;
}
return false;
}
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+
// If we're using alias analysis to disambiguate get the size of *Ptr.
- uint64_t AccessSize = 0;
- if (AA) {
- Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- AccessSize = AA->getTypeStoreSize(AccessTy);
- }
+ uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0;
+
+ Value *StrippedPtr = Ptr->stripPointerCasts();
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+ if (AreEquivalentAddressValues(
+ LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
+ CastInst::isBitCastable(LI->getType(), AccessTy)) {
if (AATags)
LI->getAAMetadata(*AATags);
return LI;
}
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+ if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
+ CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) {
if (AATags)
SI->getAAMetadata(*AATags);
return SI->getOperand(0);
// If Ptr is an alloca and this is a store to a different alloca, ignore
// the store. This is a trivial form of alias analysis that is important
// for reg2mem'd code.
- if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
- (isa<AllocaInst>(SI->getOperand(1)) ||
- isa<GlobalVariable>(SI->getOperand(1))))
+ if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
+ (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)))
continue;
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
if (AA &&
- (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
// Otherwise the store that may or may not alias the pointer, bail out.
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
// May modify the pointer, bail out.
// separated by a few arithmetic operations.
BasicBlock::iterator BBI = &LI;
if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
- return ReplaceInstUsesWith(LI, AvailableVal);
+ return ReplaceInstUsesWith(
+ LI, Builder->CreateBitCast(AvailableVal, LI.getType()));
// load(gep null, ...) -> unreachable
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
// If the returned value is the load itself, replace with an undef. This can
// only happen in dead loops.
if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+ if (AvailableVal->getType() != LI->getType())
+ AvailableVal = CastInst::Create(CastInst::BitCast, AvailableVal,
+ LI->getType(), "", LI);
LI->replaceAllUsesWith(AvailableVal);
LI->eraseFromParent();
return true;
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
- PN->addIncoming(I->second, I->first);
+ // If we have an available predecessor but it requires casting, insert the
+ // cast in the predecessor and use the cast.
+ Value *PredV = I->second;
+ if (PredV->getType() != LI->getType())
+ PredV = CastInst::Create(CastInst::BitCast, PredV, LI->getType(), "", P);
+
+ PN->addIncoming(PredV, I->first);
}
//cerr << "PRE: " << *LI << *PN << "\n";
%v = load i32* %p
ret i32 %v
}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK: %[[V1:.*]] = load i32* %x
+; CHECK-NEXT: %[[V2:.*]] = load i32* %y
+; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT: ret i32 %[[S]]
+entry:
+ store i32 0, i32* %x
+ store i32 0, i32* %y
+ ; Block forwarding by storing to %z which could alias either %x or %y.
+ store i32 42, i32* %z
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}
+
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test79(
+; CHECK: %[[V1:.*]] = load float* %x
+; CHECK-NEXT: %[[V2:.*]] = load float* %y
+; CHECK-NEXT: %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
+; CHECK-NEXT: ret float %[[S]]
+entry:
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ store i32 0, i32* %x1
+ store i32 0, i32* %y
+ ; Block forwarding by storing to %z which could alias either %x or %y.
+ store i32 42, i32* %z
+ %p = select i1 %flag, float* %x, float* %y1
+ %v = load float* %p
+ ret float %v
+}
+
+define i32 @test80(i1 %flag) {
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+; CHECK-LABEL: @test80(
+; CHECK: %[[X:.*]] = alloca i32
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK: %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT: ret i32 %[[V]]
+entry:
+ %x = alloca i32
+ %y = alloca i32
+ call void @scribble_on_memory(i32* %x)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load i32* %x
+ store i32 %tmp, i32* %y
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}
+
+define float @test81(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test81(
+; CHECK: %[[X:.*]] = alloca i32
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK: %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT: store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT: %[[C:.*]] = bitcast i32 %[[V]] to float
+; CHECK-NEXT: ret float %[[C]]
+entry:
+ %x = alloca float
+ %y = alloca i32
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ call void @scribble_on_memory(i32* %x1)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load i32* %x1
+ store i32 %tmp, i32* %y
+ %p = select i1 %flag, float* %x, float* %y1
+ %v = load float* %p
+ ret float %v
+}
+
+define i32 @test82(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test82(
+; CHECK: %[[X:.*]] = alloca float
+; CHECK-NEXT: %[[Y:.*]] = alloca i32
+; CHECK-NEXT: %[[X1:.*]] = bitcast float* %[[X]] to i32*
+; CHECK-NEXT: %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
+; CHECK: %[[V:.*]] = load float* %[[X]]
+; CHECK-NEXT: store float %[[V]], float* %[[Y1]]
+; CHECK-NEXT: %[[C:.*]] = bitcast float %[[V]] to i32
+; CHECK-NEXT: ret i32 %[[C]]
+entry:
+ %x = alloca float
+ %y = alloca i32
+ %x1 = bitcast float* %x to i32*
+ %y1 = bitcast i32* %y to float*
+ call void @scribble_on_memory(i32* %x1)
+ call void @scribble_on_memory(i32* %y)
+ %tmp = load float* %x
+ store float %tmp, float* %y1
+ %p = select i1 %flag, i32* %x1, i32* %y
+ %v = load i32* %p
+ ret i32 %v
+}