static const unsigned MaxDepth = 3;
bool isConsecutiveAccess(Value *A, Value *B);
- bool areConsecutivePointers(Value *PtrA, Value *PtrB, const APInt &PtrDelta,
+ bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta,
unsigned Depth = 0) const;
bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta,
unsigned Depth) const;
}
bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
- const APInt &PtrDelta,
- unsigned Depth) const {
+ APInt PtrDelta, unsigned Depth) const {
unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
APInt OffsetA(PtrBitWidth, 0);
APInt OffsetB(PtrBitWidth, 0);
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
- if (DL.getTypeStoreSizeInBits(PtrA->getType()) != PtrBitWidth ||
- DL.getTypeStoreSizeInBits(PtrB->getType()) != PtrBitWidth)
+ unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
+
+ if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
return false;
+ // In case if we have to shrink the pointer
+ // stripAndAccumulateInBoundsConstantOffsets should properly handle a
+ // possible overflow and the value should fit into a smallest data type
+ // used in the cast/gep chain.
+ assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth &&
+ OffsetB.getMinSignedBits() <= NewPtrBitWidth);
+
+ OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
+ OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
+ PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth);
+
APInt OffsetDelta = OffsetB - OffsetA;
// Check if they are based on the same pointer. That makes the offsets
ret void
}
+; CHECK-LABEL: @ext_ptr
+; CHECK load <2 x i32>
+define void @ext_ptr(i32 addrspace(5)* %p) {
+entry:
+ %gep1 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 0
+ %gep2 = getelementptr inbounds i32, i32 addrspace(5)* %p, i64 1
+ %a.ascast = addrspacecast i32 addrspace(5)* %gep1 to i32*
+ %b.ascast = addrspacecast i32 addrspace(5)* %gep2 to i32*
+ %tmp1 = load i32, i32* %a.ascast, align 8
+ %tmp2 = load i32, i32* %b.ascast, align 8
+ unreachable
+}
+
+; CHECK-LABEL: @shrink_ptr
+; CHECK load <2 x i32>
+define void @shrink_ptr(i32* %p) {
+entry:
+ %gep1 = getelementptr inbounds i32, i32* %p, i64 0
+ %gep2 = getelementptr inbounds i32, i32* %p, i64 1
+ %a.ascast = addrspacecast i32* %gep1 to i32 addrspace(5)*
+ %b.ascast = addrspacecast i32* %gep2 to i32 addrspace(5)*
+ %tmp1 = load i32, i32 addrspace(5)* %a.ascast, align 8
+ %tmp2 = load i32, i32 addrspace(5)* %b.ascast, align 8
+ unreachable
+}
+
+; CHECK-LABEL: @ext_ptr_wrap
+; CHECK: load <2 x i8>
+define void @ext_ptr_wrap(i8 addrspace(5)* %p) {
+entry:
+ %gep1 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 0
+ %gep2 = getelementptr inbounds i8, i8 addrspace(5)* %p, i64 4294967295
+ %a.ascast = addrspacecast i8 addrspace(5)* %gep1 to i8*
+ %b.ascast = addrspacecast i8 addrspace(5)* %gep2 to i8*
+ %tmp1 = load i8, i8* %a.ascast, align 1
+ %tmp2 = load i8, i8* %b.ascast, align 1
+ unreachable
+}
+
!0 = !{}