ProgInfo.FlatUsed = Info.UsesFlatScratch;
ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion;
- if (!isUInt<32>(ProgInfo.ScratchSize)) {
+ const uint64_t MaxScratchPerWorkitem =
+ GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
+ if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
ProgInfo.ScratchSize, DS_Error);
MF.getFunction().getContext().diagnose(DiagStackSize);
declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture, i8, i32, i32, i1) #1
-; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit
-; GCN: ; ScratchSize: 4294967296
-define amdgpu_kernel void @stack_size_limit() #0 {
+; ERROR: error: stack size limit exceeded (131061) in stack_size_limit_wave64
+; GCN: ; ScratchSize: 131061
+define amdgpu_kernel void @stack_size_limit_wave64() #0 {
entry:
- %alloca = alloca [1073741823 x i32], align 4, addrspace(5)
- %bc = bitcast [1073741823 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
- call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %bc, i8 9, i32 1073741823, i32 1, i1 true)
+ %alloca = alloca [131057 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [131057 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131057, i32 1, i1 true)
ret void
}
+
+; ERROR: error: stack size limit exceeded (262117) in stack_size_limit_wave32
+; GCN: ; ScratchSize: 262117
+define amdgpu_kernel void @stack_size_limit_wave32() #1 {
+entry:
+ %alloca = alloca [262113 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [262113 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262113, i32 1, i1 true)
+ ret void
+}
+
+; ERROR-NOT: error:
+; GCN: ; ScratchSize: 131056
+define amdgpu_kernel void @max_stack_size_wave64() #0 {
+entry:
+ %alloca = alloca [131052 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [131052 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 131052, i32 1, i1 true)
+ ret void
+}
+
+; ERROR-NOT: error:
+; GCN: ; ScratchSize: 262112
+define amdgpu_kernel void @max_stack_size_wave32() #1 {
+entry:
+ %alloca = alloca [262108 x i8], align 1, addrspace(5)
+ %alloca.bc = bitcast [262108 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
+ call void @llvm.memset.p5i8.i32(i8 addrspace(5)* %alloca.bc, i8 9, i32 262108, i32 1, i1 true)
+ ret void
+}
+
+attributes #0 = { "target-cpu" = "gfx900" }
+attributes #1 = { "target-cpu" = "gfx1010" }