-; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
-; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
-
-; GCN-LABEL: {{^}}gfx_callable_amdpal:
-; GCN: .amdgpu_pal_metadata
-; GCN-NEXT: ---
-; GCN-NEXT: amdpal.pipelines:
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG -enable-var-scope %s
+; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL -enable-var-scope %s
+
+declare float @extern_func(float) #0
+declare float @extern_func_many_args(<64 x float>) #0
+
+@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
+
+define amdgpu_gfx float @no_stack(float %arg0) #0 {
+ %add = fadd float %arg0, 1.0
+ ret float %add
+}
+
+define amdgpu_gfx float @simple_stack(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ %stack2 = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack2
+ %val2 = load volatile float, float addrspace(5)* %stack2
+ %add2 = fadd float %add, %val2
+ ret float %add2
+}
+
+define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
+bb0:
+ %cmp = fcmp ogt float %arg0, 0.0
+ br i1 %cmp, label %bb1, label %bb2
+
+bb1:
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ br label %bb2
+
+bb2:
+ %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
+ ret float %res
+}
+
+define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
+bb0:
+ br label %bb1
+
+bb1:
+ %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %add = fadd float %arg0, %val
+ %cmp = icmp sgt i32 %ctr, 0
+ %newctr = sub i32 %ctr, 1
+ br i1 %cmp, label %bb1, label %bb2
+
+bb2:
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
+ %res = call amdgpu_gfx float @simple_stack(float %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @simple_stack(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
+ %res = call amdgpu_gfx float @extern_func(float %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @extern_func(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
+ %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
+ ret float %res
+}
+
+define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
+ %fptr = load void()*, void()* addrspace(4)* @funcptr
+ call amdgpu_gfx void %fptr()
+ ret float %arg0
+}
+
+define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %fptr = load void()*, void()* addrspace(4)* @funcptr
+ call amdgpu_gfx void %fptr()
+ %add = fadd float %arg0, %val
+ ret float %add
+}
+
+define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
+ %stack = alloca float, i32 4, align 4, addrspace(5)
+ store volatile float 2.0, float addrspace(5)* %stack
+ %val = load volatile float, float addrspace(5)* %stack
+ %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
+ %add = fadd float %res, %val
+ ret float %add
+}
+
+attributes #0 = { nounwind }
+
+; GCN: amdpal.pipelines:
; GCN-NEXT: - .registers: {}
+; GCN-NEXT: .shader_functions:
+; GCN-NEXT: dynamic_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: dynamic_stack_loop:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: multiple_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
+; GCN-NEXT: no_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: no_stack_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
+; GCN-NEXT: no_stack_extern_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: no_stack_extern_call_many_args:
+; SDAG-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
+; GISEL-NEXT: .stack_frame_size_in_bytes: 0xd0{{$}}
+; GCN-NEXT: no_stack_indirect_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
+; GCN-NEXT: simple_stack:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
+; GCN-NEXT: simple_stack_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_extern_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_indirect_call:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
+; GCN-NEXT: simple_stack_recurse:
+; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: ...
-; GCN-NEXT: .end_amdgpu_pal_metadata
-define amdgpu_gfx half @gfx_callable_amdpal(half %arg0) {
- %add = fadd half %arg0, 1.0
- ret half %add
-}