Changing cast_lds_gv into a kernel function to
lower the LDS usage appropriately. The LDS lowering
is currently won't happen for orphan device functions.
; CHECK-LABEL: {{^}}cast_lds_gv:
; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_shared_base
+; CHECK: v_mov_b32_e32 v0, 0
; CHECK: v_mov_b32_e32 v1, s[[HIREG]]
; CHECK-NOT: v0
; CHECK-NOT: v1
-define void @cast_lds_gv() {
+define amdgpu_kernel void @cast_lds_gv() {
%cast = addrspacecast ptr addrspace(3) @lds to ptr
call void @flat_user(ptr %cast)
ret void