if (!IsOffsetKnown)
return PI.setAborted(&LI);
- if (LI.isVolatile() &&
- LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
- return PI.setAborted(&LI);
-
if (isa<ScalableVectorType>(LI.getType()))
return PI.setAborted(&LI);
if (!IsOffsetKnown)
return PI.setAborted(&SI);
- if (SI.isVolatile() &&
- SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
- return PI.setAborted(&SI);
-
if (isa<ScalableVectorType>(ValOp->getType()))
return PI.setAborted(&SI);
if (!IsOffsetKnown)
return PI.setAborted(&II);
- // Don't replace this with a store with a different address space. TODO:
- // Use a store with the casted new alloca?
- if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
- return PI.setAborted(&II);
-
insertUse(II, Offset, Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
if (!IsOffsetKnown)
return PI.setAborted(&II);
- // Don't replace this with a load/store with a different address space.
- // TODO: Use a store with the casted new alloca?
- if (II.isVolatile() &&
- (II.getDestAddressSpace() != DL.getAllocaAddrSpace() ||
- II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
- return PI.setAborted(&II);
-
// This side of the transfer is completely out-of-bounds, and so we can
// nuke the entire transfer. However, we also need to nuke the other side
// if already added to our partitions.
// the insertion point is set to point to the user.
IRBuilderTy IRB;
+ // Return the new alloca, addrspacecasted if required to avoid changing the
+ // addrspace of a volatile access.
+ Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
+ if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
+ return &NewAI;
+
+ Type *AccessTy = NewAI.getAllocatedType()->getPointerTo(AddrSpace);
+ return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
+ }
+
public:
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy()))) {
- LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
+ Value *NewPtr =
+ getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
+ LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
NewAI.getAlign(), LI.isVolatile(),
LI.getName());
if (AATags)
}
V = convertValue(DL, IRB, V, NewAllocaTy);
+ Value *NewPtr =
+ getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
+
NewSI =
- IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), SI.isVolatile());
+ IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
} else {
unsigned AS = SI.getPointerAddressSpace();
Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
V = convertValue(DL, IRB, V, AllocaTy);
}
+ Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
StoreInst *New =
- IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
+ IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
LLVMContext::MD_access_group});
if (AATags)
}
OtherPtrTy = OtherTy->getPointerTo(OtherAS);
- Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
+ Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
MaybeAlign SrcAlign = OtherAlign;
- Value *DstPtr = &NewAI;
MaybeAlign DstAlign = SliceAlign;
- if (!IsDest) {
- std::swap(SrcPtr, DstPtr);
+ if (!IsDest)
std::swap(SrcAlign, DstAlign);
+
+ Value *SrcPtr;
+ Value *DstPtr;
+
+ if (IsDest) {
+ DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
+ SrcPtr = AdjPtr;
+ } else {
+ DstPtr = AdjPtr;
+ SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
}
Value *Src;
bool Changed = false;
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
- if (!I) continue;
+ if (!I)
+ continue;
LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
// If the instruction is an alloca, find the possible dbg.declare connected
define i64 @alloca_addrspacecast_bitcast_volatile_store(i64 %X) {
; CHECK-LABEL: @alloca_addrspacecast_bitcast_volatile_store(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8], align 1
-; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
-; CHECK-NEXT: store volatile i64 [[X:%.*]], ptr addrspace(1) [[A_CAST]], align 4
-; CHECK-NEXT: [[Z:%.*]] = load i64, ptr addrspace(1) [[A_CAST]], align 4
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i64, align 8
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_SROA_0]] to ptr addrspace(1)
+; CHECK-NEXT: store volatile i64 [[X:%.*]], ptr addrspace(1) [[TMP0]], align 8
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_Z:%.*]] = load i64, ptr [[A_SROA_0]], align 8
+; CHECK-NEXT: ret i64 [[A_SROA_0_0_A_SROA_0_0_Z]]
;
entry:
%A = alloca [8 x i8]
ret i64 %Z
}
+%struct = type { [256 x i8], i32 }
+
+define i65 @volatile_store_addrspacecast_slice(i65 %X, i16 %idx) {
+; CHECK-LABEL: @volatile_store_addrspacecast_slice(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 4
+; CHECK-NEXT: [[A_SROA_1:%.*]] = alloca [9 x i8], align 8
+; CHECK-NEXT: [[A_SROA_1_0_GEPB_SROA_CAST:%.*]] = addrspacecast ptr [[A_SROA_1]] to ptr addrspace(1)
+; CHECK-NEXT: store volatile i65 [[X:%.*]], ptr addrspace(1) [[A_SROA_1_0_GEPB_SROA_CAST]], align 8
+; CHECK-NEXT: br label [[L2:%.*]]
+; CHECK: L2:
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_20_Z:%.*]] = load i65, ptr [[A_SROA_0]], align 4
+; CHECK-NEXT: ret i65 [[A_SROA_0_0_A_SROA_0_20_Z]]
+;
+entry:
+ %A = alloca %struct
+ %B = addrspacecast ptr %A to ptr addrspace(1)
+ %gepA = getelementptr %struct, ptr %A, i32 0, i32 0, i16 20
+ %gepB = getelementptr i65, ptr addrspace(1) %B, i16 6
+ store volatile i65 %X, ptr addrspace(1) %gepB, align 1
+ br label %L2
+
+L2:
+ %Z = load i65, ptr %gepA, align 1
+ ret i65 %Z
+}
+
; Don't change the address space of a volatile operation
define i64 @alloca_addrspacecast_bitcast_volatile_load(i64 %X) {
; CHECK-LABEL: @alloca_addrspacecast_bitcast_volatile_load(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8], align 1
-; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
-; CHECK-NEXT: store i64 [[X:%.*]], ptr addrspace(1) [[A_CAST]], align 4
-; CHECK-NEXT: [[Z:%.*]] = load volatile i64, ptr addrspace(1) [[A_CAST]], align 4
-; CHECK-NEXT: ret i64 [[Z]]
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i64, align 8
+; CHECK-NEXT: store i64 [[X:%.*]], ptr [[A_SROA_0]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_SROA_0]] to ptr addrspace(1)
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_Z:%.*]] = load volatile i64, ptr addrspace(1) [[TMP0]], align 8
+; CHECK-NEXT: ret i64 [[A_SROA_0_0_A_SROA_0_0_Z]]
;
entry:
%A = alloca [8 x i8]
declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture, i8, i32, i1) nounwind
+define i65 @volatile_load_addrspacecast_slice(i65 %X, i16 %idx) {
+; CHECK-LABEL: @volatile_load_addrspacecast_slice(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 4
+; CHECK-NEXT: [[A_SROA_1:%.*]] = alloca [9 x i8], align 8
+; CHECK-NEXT: [[A_SROA_1_0_GEPB_SROA_CAST:%.*]] = addrspacecast ptr [[A_SROA_1]] to ptr addrspace(1)
+; CHECK-NEXT: store i65 [[X:%.*]], ptr addrspace(1) [[A_SROA_1_0_GEPB_SROA_CAST]], align 8
+; CHECK-NEXT: br label [[L2:%.*]]
+; CHECK: L2:
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_20_Z:%.*]] = load volatile i65, ptr [[A_SROA_0]], align 4
+; CHECK-NEXT: ret i65 [[A_SROA_0_0_A_SROA_0_20_Z]]
+;
+entry:
+ %A = alloca %struct
+ %B = addrspacecast ptr %A to ptr addrspace(1)
+ %gepA = getelementptr %struct, ptr %A, i32 0, i32 0, i16 20
+ %gepB = getelementptr i65, ptr addrspace(1) %B, i16 6
+ store i65 %X, ptr addrspace(1) %gepB, align 1
+ br label %L2
+
+L2:
+ %Z = load volatile i65, ptr %gepA, align 1
+ ret i65 %Z
+}
+
; Don't change the address space of a volatile operation
define i32 @volatile_memset() {
; CHECK-LABEL: @volatile_memset(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [4 x i8], align 1
-; CHECK-NEXT: [[ASC:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memset.p1.i32(ptr addrspace(1) [[ASC]], i8 42, i32 4, i1 true)
-; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: ret i32 [[VAL]]
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_SROA_0]] to ptr addrspace(1)
+; CHECK-NEXT: store volatile i32 707406378, ptr addrspace(1) [[TMP0]], align 4
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load i32, ptr [[A_SROA_0]], align 4
+; CHECK-NEXT: ret i32 [[A_SROA_0_0_A_SROA_0_0_VAL]]
;
entry:
%a = alloca [4 x i8]
define void @volatile_memcpy(ptr %src, ptr %dst) {
; CHECK-LABEL: @volatile_memcpy(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca [4 x i8], align 1
-; CHECK-NEXT: [[ASC:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(1)
-; CHECK-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) [[ASC]], ptr [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr [[DST:%.*]], ptr addrspace(1) [[ASC]], i32 4, i1 true), !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[A_SROA_0]] to ptr addrspace(1)
+; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[TMP0]], align 4, !tbaa [[TBAA0]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[A_SROA_0]] to ptr addrspace(1)
+; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr addrspace(1) [[TMP1]], align 4, !tbaa [[TBAA3:![0-9]+]]
+; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]]
; CHECK-NEXT: ret void
;
entry: