CGBuilderTy &Bld = CGF.Builder;
return IsInSpmdExecutionMode
? getNVPTXNumThreads(CGF)
- : Bld.CreateSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
- "thread_limit");
+ : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
+ "thread_limit");
}
/// Get the thread id of the OMP master thread.
llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
// We assume that the warp size is a power of 2.
- llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+ llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
- return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+ return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
Bld.CreateNot(Mask), "master_tid");
}
// Step 1.2: Get the address for dest element:
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(DestBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
case ScratchpadToThread: {
// Step 1.1: Get the address for the src element in the scratchpad.
// address = base + index * ElementSizeInChars.
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
auto *CurrentOffset =
- Bld.CreateMul(llvm::ConstantInt::get(CGM.SizeTy, ElementSizeInChars),
- ScratchpadIndex);
+ Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
auto *ScratchPadElemAbsolutePtrVal =
- Bld.CreateAdd(SrcBase.getPointer(), CurrentOffset);
+ Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
ScratchPadElemAbsolutePtrVal =
Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
llvm::Value *ScratchpadBasePtr =
IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
- unsigned ElementSizeInChars =
- C.getTypeSizeInChars(Private->getType()).getQuantity();
- ScratchpadBasePtr = Bld.CreateAdd(
+ llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
ScratchpadBasePtr,
- Bld.CreateMul(ScratchpadWidth, llvm::ConstantInt::get(
- CGM.SizeTy, ElementSizeInChars)));
+ Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
// Take care of global memory alignment for performance
- ScratchpadBasePtr = Bld.CreateSub(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateSDiv(
+ ScratchpadBasePtr = Bld.CreateNUWSub(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateUDiv(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
- ScratchpadBasePtr = Bld.CreateAdd(ScratchpadBasePtr,
- llvm::ConstantInt::get(CGM.SizeTy, 1));
- ScratchpadBasePtr = Bld.CreateMul(
+ ScratchpadBasePtr = Bld.CreateNUWAdd(
+ ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+ ScratchpadBasePtr = Bld.CreateNUWMul(
ScratchpadBasePtr,
llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*),
// CHECK: call void @llvm.nvvm.barrier0()
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+// CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
-// CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+// CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK-64: [[N:%.+]] = load i32, i32* [[REF_N]],
// CHECK-32: [[N:%.+]] = load i32, i32* [[LOCAL_N]],
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: br label {{%?}}[[TERMINATE:.+]]
//
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK: load i16, i16* [[AA_CADDR]],
// CHECK: br label {{%?}}[[TERMINATE:.+]]
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// Use captures.
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// CHECK-64-DAG: load i32, i32* [[REF_A]]
// CHECK-32-DAG: load i32, i32* [[LOCAL_A]]
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// Use captures.
// CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-64-DAG: load i32, i32* [[REF_A]]
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// printf in master-only basic block.
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
// printf in master-only basic block.
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
if (foo) {
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-NOT: kmpc_fork_teams
// CHECK-DAG: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK-DAG: [[NTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[WS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK-DAG: [[TH_LIMIT:%.+]] = sub i32 [[NTH]], [[WS]]
+ // CHECK-DAG: [[TH_LIMIT:%.+]] = sub nuw i32 [[NTH]], [[WS]]
// CHECK: [[IS_WORKER:%.+]] = icmp ult i32 [[TID]], [[TH_LIMIT]]
// CHECK: br i1 [[IS_WORKER]], label {{%?}}[[WORKER:.+]], label {{%?}}[[CHECK_MASTER:.+]]
//
// CHECK: [[MASTER]]
// CHECK-DAG: [[MNTH:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
- // CHECK: [[MTMP1:%.+]] = sub i32 [[MNTH]], [[MWS]]
+ // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
// CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
//
// CHECK-NOT: kmpc_fork_teams
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to double*
// CHECK: [[ELT_VAL:%.+]] = load double, double* [[ELT]], align
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 8, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 8, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT_VAL:%.+]] = load i8, i8* [[ELT_VOID]], align
// CHECK: store i8 [[ELT_VAL]], i8* [[SCRATCHPAD_ELT_PTR]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to float*
// CHECK: [[ELT_VAL:%.+]] = load float, float* [[ELT]], align
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 1, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 1, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: store i8 [[REMOTE_ELT_VAL]], i8* [[REMOTE_ELT1]], align
// CHECK: store i8* [[REMOTE_ELT1]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 1
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 1
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i32*
// CHECK: [[ELT_VAL:%.+]] = load i32, i32* [[ELT]], align
// CHECK: [[SCRATCHPAD_ELT_PTR:%.+]] = bitcast i8* [[SCRATCHPAD_ELT_PTR_VOID]] to i32*
// CHECK: store i32 [[ELT_VAL]], i32* [[SCRATCHPAD_ELT_PTR]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
// CHECK: [[ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[RED_LIST]], i[[SZ]] 0, i[[SZ]] 1
// CHECK: [[ELT_VOID:%.+]] = load i8*, i8** [[ELT_REF]],
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[ELT:%.+]] = bitcast i8* [[ELT_VOID]] to i16*
// CHECK: [[ELT_VAL:%.+]] = load i16, i16* [[ELT]], align
// CHECK: [[SHOULD_REDUCE:%.+]] = load i32, i32* {{.+}}, align
// CHECK: [[SCRATCHPAD:%.+]] = ptrtoint i8* [[SCRATCHPAD_PTR]] to i[[SZ]]
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 4, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 4, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 0
// CHECK: [[REMOTE_ELT1_PTR:%.+]] = bitcast i32* [[REMOTE_ELT1]] to i8*
// CHECK: store i8* [[REMOTE_ELT1_PTR]], i8** [[REMOTE_ELT_REF]], align
//
- // CHECK: [[OF:%.+]] = mul i[[SZ]] [[NUM_TEAMS]], 4
- // CHECK: [[POS1:%.+]] = add i[[SZ]] [[SCRATCHPAD]], [[OF]]
- // CHECK: [[POS2:%.+]] = sub i[[SZ]] [[POS1]], 1
- // CHECK: [[POS3:%.+]] = sdiv i[[SZ]] [[POS2]], 256
- // CHECK: [[POS4:%.+]] = add i[[SZ]] [[POS3]], 1
- // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul i[[SZ]] [[POS4]], 256
+ // CHECK: [[OF:%.+]] = mul nuw i[[SZ]] [[NUM_TEAMS]], 4
+ // CHECK: [[POS1:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD]], [[OF]]
+ // CHECK: [[POS2:%.+]] = sub nuw i[[SZ]] [[POS1]], 1
+ // CHECK: [[POS3:%.+]] = udiv i[[SZ]] [[POS2]], 256
+ // CHECK: [[POS4:%.+]] = add nuw i[[SZ]] [[POS3]], 1
+ // CHECK: [[SCRATCHPAD_NEXT:%.+]] = mul nuw i[[SZ]] [[POS4]], 256
//
- // CHECK: [[P:%.+]] = mul i[[SZ]] 2, [[TEAM]]
- // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
+ // CHECK: [[P:%.+]] = mul nuw i[[SZ]] 2, [[TEAM]]
+ // CHECK: [[SCRATCHPAD_ELT_PTR64:%.+]] = add nuw i[[SZ]] [[SCRATCHPAD_NEXT]], [[P]]
// CHECK: [[SCRATCHPAD_ELT_PTR_VOID:%.+]] = inttoptr i[[SZ]] [[SCRATCHPAD_ELT_PTR64]] to i8*
// CHECK: [[REMOTE_ELT_REF:%.+]] = getelementptr inbounds [[RLT]], [[RLT]]* [[REMOTE_RED_LIST]], i[[SZ]] 0, i[[SZ]] 1