; GFX9-LABEL: test_call_external_void_func_i1_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i1_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i1(i1 true)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i1_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i1_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i1, i1 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var)
ret void
; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i1_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12
; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0
; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i1, i1 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var)
ret void
; GFX9-LABEL: test_call_external_void_func_i8_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i8_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i8(i8 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i8_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i8_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i8, i8 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var)
ret void
; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i8_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i8, i8 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var)
ret void
; GFX9-LABEL: test_call_external_void_func_i16_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i16(i16 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i16_signext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i16_signext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i16, i16 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var)
ret void
; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i16_zeroext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%var = load volatile i16, i16 addrspace(1)* undef
call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var)
ret void
; GFX9-LABEL: test_call_external_void_func_i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 42
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i32(i32 42)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i64(i64 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i64>, <2 x i64> addrspace(1)* null
call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v2, 3
; GFX9-NEXT: v_mov_b32_e32 v3, 4
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v1, 2
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v4, 1
; GFX9-NEXT: v_mov_b32_e32 v5, 2
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
; GFX9-LABEL: test_call_external_void_func_v4i64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v6, 3
; GFX9-NEXT: v_mov_b32_e32 v7, 4
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v7, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val)
; GFX9-LABEL: test_call_external_void_func_f16_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f16(half 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_f32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 4.0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 4.0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f32(float 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 1.0
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
; GFX9-NEXT: v_mov_b32_e32 v2, 4.0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v3, -1.0
; GFX9-NEXT: v_mov_b32_e32 v4, 0.5
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v5f32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1.0
; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
; GFX10-NEXT: v_mov_b32_e32 v4, 0.5
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_f64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f64(double 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 2.0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i16>, <2 x i16> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x i16>, <3 x i16> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x half>, <3 x half> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 3
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: v_mov_b32_e32 v1, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00
; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00
; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v4i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i16>, <4 x i16> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i16_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001
; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dword v0, v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x half>, <2 x half> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i32>, <2 x i32> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_mov_b32_e32 v1, 2
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v1, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v1, 4
; GFX9-NEXT: v_mov_b32_e32 v2, 5
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 3
; GFX10-NEXT: v_mov_b32_e32 v1, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v2, 5
; GFX9-NEXT: v_mov_b32_e32 v3, 6
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i32_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 3
; GFX10-NEXT: v_mov_b32_e32 v1, 4
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v4i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i32>, <4 x i32> addrspace(1)* undef
call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v2, 3
; GFX9-NEXT: v_mov_b32_e32 v3, 4
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v1, 2
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v3, 4
; GFX9-NEXT: v_mov_b32_e32 v4, 5
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v5i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v1, 2
; GFX10-NEXT: v_mov_b32_e32 v4, 5
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v8i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v8, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v8, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5]
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31]
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v8i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v8, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v8, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5]
-; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31]
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val)
; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v6, 7
; GFX9-NEXT: v_mov_b32_e32 v7, 8
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v8i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: v_mov_b32_e32 v1, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v16i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v16, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v16, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5]
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16
-; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31]
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32
+; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v16i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v16, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v16, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x3
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5]
-; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16
-; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32
-; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x3
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31]
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16
+; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32
+; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val)
; GFX9-LABEL: test_call_external_void_func_v32i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v28, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v28, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5]
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16
-; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48
-; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64
-; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80
-; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31]
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32
+; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48
+; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64
+; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80
+; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v32i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v32, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v32, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x7
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5]
-; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16
-; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32
-; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48
-; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64
-; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80
-; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96
-; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x7
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31]
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16
+; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32
+; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48
+; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64
+; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80
+; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96
+; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val)
; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v28, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v28, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5]
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16
-; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48
-; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64
-; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80
-; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31]
+; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16
+; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32
+; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48
+; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64
+; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80
+; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96
; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
+; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12
; GFX9-NEXT: s_waitcnt vmcnt(7)
; GFX9-NEXT: global_load_dword v32, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v32i32_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v32, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v32, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: global_load_dword v33, v[0:1], off
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x7
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5]
-; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16
-; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32
-; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48
-; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64
-; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80
-; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96
-; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31]
+; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16
+; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32
+; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48
+; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64
+; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80
+; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96
+; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12
; GFX10-NEXT: s_waitcnt vmcnt(8)
; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8)
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
%val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
%val1 = load i32, i32 addrspace(1)* undef
; GFX9-LABEL: test_call_external_i32_func_i32_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v0, 42
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: v_mov_b32_e32 v42, v1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
; GFX9-NEXT: global_store_dword v[41:42], v0, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_i32_func_i32_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: v_mov_b32_e32 v0, 42
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_mov_b32_e32 v42, v1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
; GFX10-NEXT: global_store_dword v[41:42], v0, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_clause 0x1
; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33
; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42)
store volatile i32 %val, i32 addrspace(1)* %out
ret void
; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dword v1, v2, s[4:5] offset:4
-; GFX9-NEXT: global_load_ubyte v0, v2, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4
+; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31]
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: global_load_ubyte v0, v2, s[4:5]
-; GFX10-NEXT: global_load_dword v1, v2, s[4:5] offset:4
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31]
+; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val)
; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 3
; GFX10-NEXT: v_mov_b32_e32 v1, 8
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33
; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4
; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = alloca { i8, i32 }, align 4, addrspace(5)
%gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
%gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: v_mov_b32_e32 v0, 3
; GFX9-NEXT: v_add_u32_e32 v0, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xf800
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v[0:1], v1, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 3
; GFX10-NEXT: v_mov_b32_e32 v1, 8
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8
; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfc00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dword v[0:1], v1, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: s_clause 0x1
; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8
; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off
; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%in.val = alloca { i8, i32 }, align 4, addrspace(5)
%out.val = alloca { i8, i32 }, align 4, addrspace(5)
%in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
; GFX9-LABEL: test_call_external_void_func_v16i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
+; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31]
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v1, v16
; GFX9-NEXT: v_mov_b32_e32 v2, v17
; GFX9-NEXT: v_mov_b32_e32 v3, v18
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v16i8:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5]
+; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31]
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0
; GFX10-NEXT: v_mov_b32_e32 v1, v16
; GFX10-NEXT: v_mov_b32_e32 v2, v17
; GFX10-NEXT: v_mov_b32_e32 v3, v18
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
%val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val)
; GFX9-LABEL: tail_call_byval_align16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
-; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
+; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 30
+; GFX9-NEXT: s_mov_b32 s33, s32
+; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8
+; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:12
+; GFX9-NEXT: v_writelane_b32 v40, s34, 0
+; GFX9-NEXT: v_writelane_b32 v40, s35, 1
+; GFX9-NEXT: v_writelane_b32 v40, s36, 2
+; GFX9-NEXT: v_writelane_b32 v40, s37, 3
+; GFX9-NEXT: v_writelane_b32 v40, s38, 4
+; GFX9-NEXT: v_writelane_b32 v40, s39, 5
+; GFX9-NEXT: v_writelane_b32 v40, s40, 6
+; GFX9-NEXT: v_writelane_b32 v40, s41, 7
+; GFX9-NEXT: v_writelane_b32 v40, s42, 8
+; GFX9-NEXT: v_writelane_b32 v40, s43, 9
+; GFX9-NEXT: v_writelane_b32 v40, s44, 10
+; GFX9-NEXT: v_writelane_b32 v40, s45, 11
+; GFX9-NEXT: v_writelane_b32 v40, s46, 12
+; GFX9-NEXT: v_writelane_b32 v40, s47, 13
+; GFX9-NEXT: v_writelane_b32 v40, s48, 14
+; GFX9-NEXT: v_writelane_b32 v40, s49, 15
+; GFX9-NEXT: v_writelane_b32 v40, s50, 16
+; GFX9-NEXT: v_writelane_b32 v40, s51, 17
+; GFX9-NEXT: v_writelane_b32 v40, s52, 18
+; GFX9-NEXT: v_writelane_b32 v40, s53, 19
+; GFX9-NEXT: v_writelane_b32 v40, s54, 20
+; GFX9-NEXT: v_writelane_b32 v40, s55, 21
+; GFX9-NEXT: v_writelane_b32 v40, s56, 22
+; GFX9-NEXT: v_writelane_b32 v40, s57, 23
+; GFX9-NEXT: v_writelane_b32 v40, s58, 24
+; GFX9-NEXT: v_writelane_b32 v40, s59, 25
+; GFX9-NEXT: v_writelane_b32 v40, s60, 26
+; GFX9-NEXT: v_writelane_b32 v40, s61, 27
+; GFX9-NEXT: s_addk_i32 s32, 0x800
+; GFX9-NEXT: v_writelane_b32 v40, s62, 28
+; GFX9-NEXT: v_writelane_b32 v40, s63, 29
+; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31]
+; GFX9-NEXT: s_getpc_b64 s[6:7]
+; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
+; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_readlane_b32 s63, v40, 29
+; GFX9-NEXT: v_readlane_b32 s62, v40, 28
+; GFX9-NEXT: v_readlane_b32 s61, v40, 27
+; GFX9-NEXT: v_readlane_b32 s60, v40, 26
+; GFX9-NEXT: v_readlane_b32 s59, v40, 25
+; GFX9-NEXT: v_readlane_b32 s58, v40, 24
+; GFX9-NEXT: v_readlane_b32 s57, v40, 23
+; GFX9-NEXT: v_readlane_b32 s56, v40, 22
+; GFX9-NEXT: v_readlane_b32 s55, v40, 21
+; GFX9-NEXT: v_readlane_b32 s54, v40, 20
+; GFX9-NEXT: v_readlane_b32 s53, v40, 19
+; GFX9-NEXT: v_readlane_b32 s52, v40, 18
+; GFX9-NEXT: v_readlane_b32 s51, v40, 17
+; GFX9-NEXT: v_readlane_b32 s50, v40, 16
+; GFX9-NEXT: v_readlane_b32 s49, v40, 15
+; GFX9-NEXT: v_readlane_b32 s48, v40, 14
+; GFX9-NEXT: v_readlane_b32 s47, v40, 13
+; GFX9-NEXT: v_readlane_b32 s46, v40, 12
+; GFX9-NEXT: v_readlane_b32 s45, v40, 11
+; GFX9-NEXT: v_readlane_b32 s44, v40, 10
+; GFX9-NEXT: v_readlane_b32 s43, v40, 9
+; GFX9-NEXT: v_readlane_b32 s42, v40, 8
+; GFX9-NEXT: v_readlane_b32 s41, v40, 7
+; GFX9-NEXT: v_readlane_b32 s40, v40, 6
+; GFX9-NEXT: v_readlane_b32 s39, v40, 5
+; GFX9-NEXT: v_readlane_b32 s38, v40, 4
+; GFX9-NEXT: v_readlane_b32 s37, v40, 3
+; GFX9-NEXT: v_readlane_b32 s36, v40, 2
+; GFX9-NEXT: v_readlane_b32 s35, v40, 1
+; GFX9-NEXT: v_readlane_b32 s34, v40, 0
+; GFX9-NEXT: s_addk_i32 s32, 0xf800
+; GFX9-NEXT: v_readlane_b32 s33, v40, 30
+; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[4:5]
;
; GFX10-LABEL: tail_call_byval_align16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: v_writelane_b32 v40, s33, 30
+; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
-; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
+; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:12
+; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:8
+; GFX10-NEXT: s_addk_i32 s32, 0x400
+; GFX10-NEXT: v_writelane_b32 v40, s34, 0
+; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31]
+; GFX10-NEXT: s_getpc_b64 s[6:7]
+; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32
+; GFX10-NEXT: v_writelane_b32 v40, s35, 1
+; GFX10-NEXT: v_writelane_b32 v40, s36, 2
+; GFX10-NEXT: v_writelane_b32 v40, s37, 3
+; GFX10-NEXT: v_writelane_b32 v40, s38, 4
+; GFX10-NEXT: v_writelane_b32 v40, s39, 5
+; GFX10-NEXT: v_writelane_b32 v40, s40, 6
+; GFX10-NEXT: v_writelane_b32 v40, s41, 7
+; GFX10-NEXT: v_writelane_b32 v40, s42, 8
+; GFX10-NEXT: v_writelane_b32 v40, s43, 9
+; GFX10-NEXT: v_writelane_b32 v40, s44, 10
+; GFX10-NEXT: v_writelane_b32 v40, s45, 11
+; GFX10-NEXT: v_writelane_b32 v40, s46, 12
+; GFX10-NEXT: v_writelane_b32 v40, s47, 13
+; GFX10-NEXT: v_writelane_b32 v40, s48, 14
+; GFX10-NEXT: v_writelane_b32 v40, s49, 15
+; GFX10-NEXT: v_writelane_b32 v40, s50, 16
+; GFX10-NEXT: v_writelane_b32 v40, s51, 17
+; GFX10-NEXT: v_writelane_b32 v40, s52, 18
+; GFX10-NEXT: v_writelane_b32 v40, s53, 19
+; GFX10-NEXT: v_writelane_b32 v40, s54, 20
+; GFX10-NEXT: v_writelane_b32 v40, s55, 21
+; GFX10-NEXT: v_writelane_b32 v40, s56, 22
+; GFX10-NEXT: v_writelane_b32 v40, s57, 23
+; GFX10-NEXT: v_writelane_b32 v40, s58, 24
+; GFX10-NEXT: v_writelane_b32 v40, s59, 25
+; GFX10-NEXT: v_writelane_b32 v40, s60, 26
+; GFX10-NEXT: v_writelane_b32 v40, s61, 27
+; GFX10-NEXT: v_writelane_b32 v40, s62, 28
+; GFX10-NEXT: v_writelane_b32 v40, s63, 29
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_readlane_b32 s63, v40, 29
+; GFX10-NEXT: v_readlane_b32 s62, v40, 28
+; GFX10-NEXT: v_readlane_b32 s61, v40, 27
+; GFX10-NEXT: v_readlane_b32 s60, v40, 26
+; GFX10-NEXT: v_readlane_b32 s59, v40, 25
+; GFX10-NEXT: v_readlane_b32 s58, v40, 24
+; GFX10-NEXT: v_readlane_b32 s57, v40, 23
+; GFX10-NEXT: v_readlane_b32 s56, v40, 22
+; GFX10-NEXT: v_readlane_b32 s55, v40, 21
+; GFX10-NEXT: v_readlane_b32 s54, v40, 20
+; GFX10-NEXT: v_readlane_b32 s53, v40, 19
+; GFX10-NEXT: v_readlane_b32 s52, v40, 18
+; GFX10-NEXT: v_readlane_b32 s51, v40, 17
+; GFX10-NEXT: v_readlane_b32 s50, v40, 16
+; GFX10-NEXT: v_readlane_b32 s49, v40, 15
+; GFX10-NEXT: v_readlane_b32 s48, v40, 14
+; GFX10-NEXT: v_readlane_b32 s47, v40, 13
+; GFX10-NEXT: v_readlane_b32 s46, v40, 12
+; GFX10-NEXT: v_readlane_b32 s45, v40, 11
+; GFX10-NEXT: v_readlane_b32 s44, v40, 10
+; GFX10-NEXT: v_readlane_b32 s43, v40, 9
+; GFX10-NEXT: v_readlane_b32 s42, v40, 8
+; GFX10-NEXT: v_readlane_b32 s41, v40, 7
+; GFX10-NEXT: v_readlane_b32 s40, v40, 6
+; GFX10-NEXT: v_readlane_b32 s39, v40, 5
+; GFX10-NEXT: v_readlane_b32 s38, v40, 4
+; GFX10-NEXT: v_readlane_b32 s37, v40, 3
+; GFX10-NEXT: v_readlane_b32 s36, v40, 2
+; GFX10-NEXT: v_readlane_b32 s35, v40, 1
+; GFX10-NEXT: v_readlane_b32 s34, v40, 0
+; GFX10-NEXT: s_addk_i32 s32, 0xfc00
+; GFX10-NEXT: v_readlane_b32 s33, v40, 30
+; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[4:5]
;
; GFX10-SCRATCH-LABEL: tail_call_byval_align16:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s32 offset:8
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
+; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill
+; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30
+; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
+; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32
+; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:8
+; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31]
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 9
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 10
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 11
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 13
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 15
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 17
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 18
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 19
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 20
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 21
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 22
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 23
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 24
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 25
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 26
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 26
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 25
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 24
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 23
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 22
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 21
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 20
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 19
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 18
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 17
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 16
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 13
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 12
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 11
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 10
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0
+; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
+; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload
+; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5]
entry:
%alloca = alloca double, align 8, addrspace(5)
tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca)
; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_mov_b32_e32 v0, 1
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12
; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 1
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_movk_i32 s4, 0x7b
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_movk_i32 s4, 0x7b
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_mov_b32 s4, 42
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 42
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 42
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42)
ret void
}
; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_movk_i32 s4, 0x7b
; GFX9-NEXT: s_mov_b32 s5, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_movk_i32 s4, 0x7b
-; GFX10-NEXT: s_mov_b32 s5, 0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_mov_b64 s[30:31], 0
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_mov_b64 s[4:5], 0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_mov_b64 s[30:31], 0
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i64>, <2 x i64> addrspace(4)* null
call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: s_mov_b32 s5, 2
; GFX9-NEXT: s_mov_b32 s6, 3
; GFX9-NEXT: s_mov_b32 s7, 4
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 3
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 4
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg <i64 8589934593, i64 17179869187>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 8
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s30, 6
+; GFX9-NEXT: v_writelane_b32 v40, s31, 7
+; GFX9-NEXT: s_mov_b64 s[30:31], 0
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s8, 1
; GFX9-NEXT: s_mov_b32 s9, 2
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[10:11]
-; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 6
+; GFX9-NEXT: v_readlane_b32 s31, v40, 7
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 8
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_mov_b64 s[4:5], 0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT: s_mov_b32 s8, 1
-; GFX10-NEXT: s_mov_b32 s9, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 8
; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[10:11]
-; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-NEXT: s_mov_b32 s8, 1
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-NEXT: s_mov_b32 s9, 2
+; GFX10-NEXT: v_writelane_b32 v40, s30, 6
+; GFX10-NEXT: v_writelane_b32 v40, s31, 7
+; GFX10-NEXT: s_mov_b64 s[30:31], 0
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 6
+; GFX10-NEXT: v_readlane_b32 s31, v40, 7
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 8
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8
; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, <2 x i64> addrspace(4)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
; GFX9-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 10
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
+; GFX9-NEXT: v_writelane_b32 v40, s30, 8
+; GFX9-NEXT: v_writelane_b32 v40, s31, 9
+; GFX9-NEXT: s_mov_b64 s[30:31], 0
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: s_mov_b32 s8, 1
; GFX9-NEXT: s_mov_b32 s9, 2
; GFX9-NEXT: s_mov_b32 s10, 3
; GFX9-NEXT: s_mov_b32 s11, 4
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[12:13]
-; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 8
+; GFX9-NEXT: v_readlane_b32 s31, v40, 9
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 10
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_mov_b64 s[4:5], 0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 10
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-NEXT: s_mov_b32 s8, 1
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
; GFX10-NEXT: s_mov_b32 s9, 2
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
; GFX10-NEXT: s_mov_b32 s10, 3
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
; GFX10-NEXT: s_mov_b32 s11, 4
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[12:13]
-; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-NEXT: v_writelane_b32 v40, s31, 9
+; GFX10-NEXT: s_mov_b64 s[30:31], 0
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10
; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, <2 x i64> addrspace(4)* null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val)
; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_movk_i32 s4, 0x4400
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_movk_i32 s4, 0x4400
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_movk_i32 s4, 0x4400
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: s_mov_b32 s4, 4.0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 4.0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 4.0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 1.0
; GFX9-NEXT: s_mov_b32 s5, 2.0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 1.0
-; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 1.0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg <float 1.0, float 2.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 5
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 3
; GFX9-NEXT: s_mov_b32 s4, 1.0
; GFX9-NEXT: s_mov_b32 s5, 2.0
; GFX9-NEXT: s_mov_b32 s6, 4.0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 4
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 3
+; GFX9-NEXT: v_readlane_b32 s31, v40, 4
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 5
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 5
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1.0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 4.0
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 3
+; GFX10-NEXT: v_writelane_b32 v40, s31, 4
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 3
+; GFX10-NEXT: v_readlane_b32 s31, v40, 4
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 5
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg <float 1.0, float 2.0, float 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 7
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 5
; GFX9-NEXT: s_mov_b32 s4, 1.0
; GFX9-NEXT: s_mov_b32 s5, 2.0
; GFX9-NEXT: s_mov_b32 s6, 4.0
; GFX9-NEXT: s_mov_b32 s7, -1.0
; GFX9-NEXT: s_mov_b32 s8, 0.5
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[10:11]
-; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 6
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 5
+; GFX9-NEXT: v_readlane_b32 s31, v40, 6
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 7
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 7
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1.0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 4.0
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, -1.0
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-NEXT: s_mov_b32 s8, 0.5
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[10:11]
-; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 5
+; GFX10-NEXT: v_writelane_b32 v40, s31, 6
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 5
+; GFX10-NEXT: v_readlane_b32 s31, v40, 6
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 7
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: s_mov_b32 s5, 0x40100000
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 0
-; GFX10-NEXT: s_mov_b32 s5, 0x40100000
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 0x40100000
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: s_mov_b32 s5, 2.0
; GFX9-NEXT: s_mov_b32 s6, 0
; GFX9-NEXT: s_mov_b32 s7, 0x40100000
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 0x40100000
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg <double 2.0, double 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 8
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 6
; GFX9-NEXT: s_mov_b32 s4, 0
; GFX9-NEXT: s_mov_b32 s5, 2.0
; GFX9-NEXT: s_mov_b32 s6, 0
; GFX9-NEXT: s_mov_b32 s7, 0x40100000
; GFX9-NEXT: s_mov_b32 s8, 0
; GFX9-NEXT: s_mov_b32 s9, 0x40200000
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[10:11]
-; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 7
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 6
+; GFX9-NEXT: v_readlane_b32 s31, v40, 7
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 8
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 8
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2.0
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 0
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 0x40100000
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-NEXT: s_mov_b32 s8, 0
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
; GFX10-NEXT: s_mov_b32 s9, 0x40200000
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[10:11]
-; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 6
+; GFX10-NEXT: v_writelane_b32 v40, s31, 7
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 6
+; GFX10-NEXT: v_readlane_b32 s31, v40, 7
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 8
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8
+; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
+; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
+; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000
-; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
-; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
-; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4
-; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg <double 2.0, double 4.0, double 8.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
+; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i16>, <2 x i16> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x i16>, <3 x i16> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <3 x half>, <3 x half> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 0x20001
; GFX9-NEXT: s_mov_b32 s5, 3
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 0x20001
-; GFX10-NEXT: s_mov_b32 s5, 3
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 0x20001
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 3
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg <i16 1, i16 2, i16 3>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 0x40003c00
; GFX9-NEXT: s_movk_i32 s5, 0x4400
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 0x40003c00
-; GFX10-NEXT: s_movk_i32 s5, 0x4400
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 0x40003c00
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_movk_i32 s5, 0x4400
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00
-; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg <half 1.0, half 2.0, half 4.0>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i16>, <4 x i16> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 0x20001
; GFX9-NEXT: s_mov_b32 s5, 0x40003
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 0x20001
-; GFX10-NEXT: s_mov_b32 s5, 0x40003
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 0x20001
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 0x40003
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg <i16 1, i16 2, i16 3, i16 4>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 3
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 1
+; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX9-NEXT: v_writelane_b32 v40, s31, 2
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 1
+; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 3
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 2
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x half>, <2 x half> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
+; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <2 x i32>, <2 x i32> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: s_mov_b32 s5, 2
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[6:7]
-; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 3
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 2
+; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-NEXT: s_mov_b32 s4, 1
-; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[6:7]
-; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: s_mov_b32 s4, 1
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-NEXT: v_writelane_b32 v40, s31, 3
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg <i32 1, i32 2>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 5
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 3
; GFX9-NEXT: s_mov_b32 s4, 3
; GFX9-NEXT: s_mov_b32 s5, 4
; GFX9-NEXT: s_mov_b32 s6, 5
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 4
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 3
+; GFX9-NEXT: v_readlane_b32 s31, v40, 4
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 5
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 5
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 3
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 4
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 5
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 3
+; GFX10-NEXT: v_writelane_b32 v40, s31, 4
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 3
+; GFX10-NEXT: v_readlane_b32 s31, v40, 4
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 5
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
; GFX9-NEXT: s_mov_b32 s4, 3
; GFX9-NEXT: s_mov_b32 s5, 4
; GFX9-NEXT: s_mov_b32 s6, 5
; GFX9-NEXT: s_mov_b32 s7, 6
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 3
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 4
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 5
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 6
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg <i32 3, i32 4, i32 5>, i32 inreg 6)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
+; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%val = load <4 x i32>, <4 x i32> addrspace(4)* undef
call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val)
ret void
; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 6
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 4
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: s_mov_b32 s5, 2
; GFX9-NEXT: s_mov_b32 s6, 3
; GFX9-NEXT: s_mov_b32 s7, 4
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[8:9]
-; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 5
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 4
+; GFX9-NEXT: v_readlane_b32 s31, v40, 5
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 6
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 6
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 3
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 4
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[8:9]
-; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-NEXT: v_writelane_b32 v40, s31, 5
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg <i32 1, i32 2, i32 3, i32 4>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 7
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 5
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: s_mov_b32 s5, 2
; GFX9-NEXT: s_mov_b32 s6, 3
; GFX9-NEXT: s_mov_b32 s7, 4
; GFX9-NEXT: s_mov_b32 s8, 5
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[10:11]
-; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 6
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 5
+; GFX9-NEXT: v_readlane_b32 s31, v40, 6
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 7
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 7
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 3
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 4
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-NEXT: s_mov_b32 s8, 5
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[10:11]
-; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 5
+; GFX10-NEXT: v_writelane_b32 v40, s31, 6
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 5
+; GFX10-NEXT: v_readlane_b32 s31, v40, 6
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 7
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
-; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
-; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
-; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 10
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
+; GFX9-NEXT: v_writelane_b32 v40, s30, 8
+; GFX9-NEXT: v_writelane_b32 v40, s31, 9
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[12:13]
-; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 8
+; GFX9-NEXT: v_readlane_b32 s31, v40, 9
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 10
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 10
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[12:13]
-; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-NEXT: v_writelane_b32 v40, s31, 9
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10
; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef
%val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr
call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val)
; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 10
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s30, 8
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: s_mov_b32 s5, 2
; GFX9-NEXT: s_mov_b32 s6, 3
; GFX9-NEXT: s_mov_b32 s9, 6
; GFX9-NEXT: s_mov_b32 s10, 7
; GFX9-NEXT: s_mov_b32 s11, 8
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[12:13]
-; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 9
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 8
+; GFX9-NEXT: v_readlane_b32 s31, v40, 9
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 10
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 10
+; GFX10-NEXT: s_mov_b32 s33, s32
+; GFX10-NEXT: s_addk_i32 s32, 0x200
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_mov_b32 s4, 1
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_mov_b32 s5, 2
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-NEXT: s_mov_b32 s6, 3
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-NEXT: s_mov_b32 s7, 4
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-NEXT: s_mov_b32 s8, 5
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
; GFX10-NEXT: s_mov_b32 s9, 6
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
; GFX10-NEXT: s_mov_b32 s10, 7
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
; GFX10-NEXT: s_mov_b32 s11, 8
-; GFX10-NEXT: s_mov_b32 s33, s32
-; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[12:13]
-; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-NEXT: v_writelane_b32 v40, s31, 9
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10
+; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
+; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
+; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
+; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8
-; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
-; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
-; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4
-; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
ret void
}
; GFX9-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s33, 2
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 18
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
+; GFX9-NEXT: v_writelane_b32 v40, s12, 8
+; GFX9-NEXT: v_writelane_b32 v40, s13, 9
+; GFX9-NEXT: v_writelane_b32 v40, s14, 10
+; GFX9-NEXT: v_writelane_b32 v40, s15, 11
+; GFX9-NEXT: v_writelane_b32 v40, s16, 12
+; GFX9-NEXT: v_writelane_b32 v40, s17, 13
+; GFX9-NEXT: v_writelane_b32 v40, s18, 14
+; GFX9-NEXT: v_writelane_b32 v40, s19, 15
+; GFX9-NEXT: v_writelane_b32 v40, s30, 16
+; GFX9-NEXT: v_writelane_b32 v40, s31, 17
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: s_getpc_b64 s[34:35]
+; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[20:21]
-; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
+; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 16
+; GFX9-NEXT: v_readlane_b32 s31, v40, 17
+; GFX9-NEXT: v_readlane_b32 s19, v40, 15
+; GFX9-NEXT: v_readlane_b32 s18, v40, 14
+; GFX9-NEXT: v_readlane_b32 s17, v40, 13
+; GFX9-NEXT: v_readlane_b32 s16, v40, 12
+; GFX9-NEXT: v_readlane_b32 s15, v40, 11
+; GFX9-NEXT: v_readlane_b32 s14, v40, 10
+; GFX9-NEXT: v_readlane_b32 s13, v40, 9
+; GFX9-NEXT: v_readlane_b32 s12, v40, 8
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 18
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s33, 2
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 18
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: s_getpc_b64 s[20:21]
-; GFX10-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12
-; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-NEXT: v_writelane_b32 v40, s19, 15
+; GFX10-NEXT: v_writelane_b32 v40, s30, 16
+; GFX10-NEXT: v_writelane_b32 v40, s31, 17
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[20:21]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
+; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 16
+; GFX10-NEXT: v_readlane_b32 s31, v40, 17
+; GFX10-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 18
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18
; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef
%val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr
call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val)
; GFX9-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 18
-; GFX9-NEXT: v_writelane_b32 v40, s36, 0
-; GFX9-NEXT: v_writelane_b32 v40, s37, 1
-; GFX9-NEXT: v_writelane_b32 v40, s38, 2
-; GFX9-NEXT: v_writelane_b32 v40, s39, 3
-; GFX9-NEXT: v_writelane_b32 v40, s40, 4
-; GFX9-NEXT: v_writelane_b32 v40, s41, 5
-; GFX9-NEXT: v_writelane_b32 v40, s42, 6
-; GFX9-NEXT: v_writelane_b32 v40, s43, 7
-; GFX9-NEXT: v_writelane_b32 v40, s44, 8
-; GFX9-NEXT: v_writelane_b32 v40, s45, 9
-; GFX9-NEXT: v_writelane_b32 v40, s46, 10
-; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s47, 11
-; GFX9-NEXT: v_writelane_b32 v40, s48, 12
-; GFX9-NEXT: v_writelane_b32 v40, s49, 13
-; GFX9-NEXT: v_writelane_b32 v40, s50, 14
-; GFX9-NEXT: v_writelane_b32 v40, s51, 15
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 28
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
+; GFX9-NEXT: v_writelane_b32 v40, s12, 8
+; GFX9-NEXT: v_writelane_b32 v40, s13, 9
+; GFX9-NEXT: v_writelane_b32 v40, s14, 10
+; GFX9-NEXT: v_writelane_b32 v40, s15, 11
+; GFX9-NEXT: v_writelane_b32 v40, s16, 12
+; GFX9-NEXT: v_writelane_b32 v40, s17, 13
+; GFX9-NEXT: v_writelane_b32 v40, s18, 14
+; GFX9-NEXT: v_writelane_b32 v40, s19, 15
+; GFX9-NEXT: v_writelane_b32 v40, s20, 16
+; GFX9-NEXT: v_writelane_b32 v40, s21, 17
+; GFX9-NEXT: v_writelane_b32 v40, s22, 18
+; GFX9-NEXT: v_writelane_b32 v40, s23, 19
+; GFX9-NEXT: v_writelane_b32 v40, s24, 20
+; GFX9-NEXT: v_writelane_b32 v40, s25, 21
+; GFX9-NEXT: v_writelane_b32 v40, s26, 22
+; GFX9-NEXT: v_writelane_b32 v40, s27, 23
+; GFX9-NEXT: v_writelane_b32 v40, s28, 24
+; GFX9-NEXT: v_writelane_b32 v40, s29, 25
+; GFX9-NEXT: v_writelane_b32 v40, s30, 26
+; GFX9-NEXT: v_writelane_b32 v40, s31, 27
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_writelane_b32 v40, s30, 16
-; GFX9-NEXT: v_writelane_b32 v40, s31, 17
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
+; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s46
; GFX9-NEXT: v_mov_b32_e32 v1, s47
; GFX9-NEXT: s_mov_b32 s28, s44
; GFX9-NEXT: s_mov_b32 s29, s45
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
-; GFX9-NEXT: s_getpc_b64 s[30:31]
-; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 16
-; GFX9-NEXT: v_readlane_b32 s5, v40, 17
-; GFX9-NEXT: v_readlane_b32 s51, v40, 15
-; GFX9-NEXT: v_readlane_b32 s50, v40, 14
-; GFX9-NEXT: v_readlane_b32 s49, v40, 13
-; GFX9-NEXT: v_readlane_b32 s48, v40, 12
-; GFX9-NEXT: v_readlane_b32 s47, v40, 11
-; GFX9-NEXT: v_readlane_b32 s46, v40, 10
-; GFX9-NEXT: v_readlane_b32 s45, v40, 9
-; GFX9-NEXT: v_readlane_b32 s44, v40, 8
-; GFX9-NEXT: v_readlane_b32 s43, v40, 7
-; GFX9-NEXT: v_readlane_b32 s42, v40, 6
-; GFX9-NEXT: v_readlane_b32 s41, v40, 5
-; GFX9-NEXT: v_readlane_b32 s40, v40, 4
-; GFX9-NEXT: v_readlane_b32 s39, v40, 3
-; GFX9-NEXT: v_readlane_b32 s38, v40, 2
-; GFX9-NEXT: v_readlane_b32 s37, v40, 1
-; GFX9-NEXT: v_readlane_b32 s36, v40, 0
+; GFX9-NEXT: v_readlane_b32 s30, v40, 26
+; GFX9-NEXT: v_readlane_b32 s31, v40, 27
+; GFX9-NEXT: v_readlane_b32 s29, v40, 25
+; GFX9-NEXT: v_readlane_b32 s28, v40, 24
+; GFX9-NEXT: v_readlane_b32 s27, v40, 23
+; GFX9-NEXT: v_readlane_b32 s26, v40, 22
+; GFX9-NEXT: v_readlane_b32 s25, v40, 21
+; GFX9-NEXT: v_readlane_b32 s24, v40, 20
+; GFX9-NEXT: v_readlane_b32 s23, v40, 19
+; GFX9-NEXT: v_readlane_b32 s22, v40, 18
+; GFX9-NEXT: v_readlane_b32 s21, v40, 17
+; GFX9-NEXT: v_readlane_b32 s20, v40, 16
+; GFX9-NEXT: v_readlane_b32 s19, v40, 15
+; GFX9-NEXT: v_readlane_b32 s18, v40, 14
+; GFX9-NEXT: v_readlane_b32 s17, v40, 13
+; GFX9-NEXT: v_readlane_b32 s16, v40, 12
+; GFX9-NEXT: v_readlane_b32 s15, v40, 11
+; GFX9-NEXT: v_readlane_b32 s14, v40, 10
+; GFX9-NEXT: v_readlane_b32 s13, v40, 9
+; GFX9-NEXT: v_readlane_b32 s12, v40, 8
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
+; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 18
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 28
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 18
-; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 28
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s36, 0
-; GFX10-NEXT: v_writelane_b32 v40, s37, 1
-; GFX10-NEXT: v_writelane_b32 v40, s38, 2
-; GFX10-NEXT: v_writelane_b32 v40, s39, 3
-; GFX10-NEXT: v_writelane_b32 v40, s40, 4
-; GFX10-NEXT: v_writelane_b32 v40, s41, 5
-; GFX10-NEXT: v_writelane_b32 v40, s42, 6
-; GFX10-NEXT: v_writelane_b32 v40, s43, 7
-; GFX10-NEXT: v_writelane_b32 v40, s44, 8
-; GFX10-NEXT: v_writelane_b32 v40, s45, 9
-; GFX10-NEXT: v_writelane_b32 v40, s46, 10
-; GFX10-NEXT: v_writelane_b32 v40, s47, 11
-; GFX10-NEXT: v_writelane_b32 v40, s48, 12
-; GFX10-NEXT: v_writelane_b32 v40, s49, 13
-; GFX10-NEXT: v_writelane_b32 v40, s50, 14
-; GFX10-NEXT: v_writelane_b32 v40, s51, 15
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-NEXT: v_writelane_b32 v40, s19, 15
+; GFX10-NEXT: v_writelane_b32 v40, s20, 16
+; GFX10-NEXT: v_writelane_b32 v40, s21, 17
+; GFX10-NEXT: v_writelane_b32 v40, s22, 18
+; GFX10-NEXT: v_writelane_b32 v40, s23, 19
+; GFX10-NEXT: v_writelane_b32 v40, s24, 20
+; GFX10-NEXT: v_writelane_b32 v40, s25, 21
+; GFX10-NEXT: v_writelane_b32 v40, s26, 22
+; GFX10-NEXT: v_writelane_b32 v40, s27, 23
+; GFX10-NEXT: v_writelane_b32 v40, s28, 24
+; GFX10-NEXT: v_writelane_b32 v40, s29, 25
+; GFX10-NEXT: v_writelane_b32 v40, s30, 26
+; GFX10-NEXT: v_writelane_b32 v40, s31, 27
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
-; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; GFX10-NEXT: v_writelane_b32 v40, s30, 16
-; GFX10-NEXT: v_writelane_b32 v40, s31, 17
+; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40
+; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
; GFX10-NEXT: s_getpc_b64 s[30:31]
; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12
; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 16
-; GFX10-NEXT: v_readlane_b32 s5, v40, 17
-; GFX10-NEXT: v_readlane_b32 s51, v40, 15
-; GFX10-NEXT: v_readlane_b32 s50, v40, 14
-; GFX10-NEXT: v_readlane_b32 s49, v40, 13
-; GFX10-NEXT: v_readlane_b32 s48, v40, 12
-; GFX10-NEXT: v_readlane_b32 s47, v40, 11
-; GFX10-NEXT: v_readlane_b32 s46, v40, 10
-; GFX10-NEXT: v_readlane_b32 s45, v40, 9
-; GFX10-NEXT: v_readlane_b32 s44, v40, 8
-; GFX10-NEXT: v_readlane_b32 s43, v40, 7
-; GFX10-NEXT: v_readlane_b32 s42, v40, 6
-; GFX10-NEXT: v_readlane_b32 s41, v40, 5
-; GFX10-NEXT: v_readlane_b32 s40, v40, 4
-; GFX10-NEXT: v_readlane_b32 s39, v40, 3
-; GFX10-NEXT: v_readlane_b32 s38, v40, 2
-; GFX10-NEXT: v_readlane_b32 s37, v40, 1
-; GFX10-NEXT: v_readlane_b32 s36, v40, 0
+; GFX10-NEXT: v_readlane_b32 s30, v40, 26
+; GFX10-NEXT: v_readlane_b32 s31, v40, 27
+; GFX10-NEXT: v_readlane_b32 s29, v40, 25
+; GFX10-NEXT: v_readlane_b32 s28, v40, 24
+; GFX10-NEXT: v_readlane_b32 s27, v40, 23
+; GFX10-NEXT: v_readlane_b32 s26, v40, 22
+; GFX10-NEXT: v_readlane_b32 s25, v40, 21
+; GFX10-NEXT: v_readlane_b32 s24, v40, 20
+; GFX10-NEXT: v_readlane_b32 s23, v40, 19
+; GFX10-NEXT: v_readlane_b32 s22, v40, 18
+; GFX10-NEXT: v_readlane_b32 s21, v40, 17
+; GFX10-NEXT: v_readlane_b32 s20, v40, 16
+; GFX10-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 18
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 28
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28
; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: s_clause 0x1
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
+; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
+; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
-; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
-; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
%val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr
call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val)
; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 18
-; GFX9-NEXT: v_writelane_b32 v40, s36, 0
-; GFX9-NEXT: v_writelane_b32 v40, s37, 1
-; GFX9-NEXT: v_writelane_b32 v40, s38, 2
-; GFX9-NEXT: v_writelane_b32 v40, s39, 3
-; GFX9-NEXT: v_writelane_b32 v40, s40, 4
-; GFX9-NEXT: v_writelane_b32 v40, s41, 5
-; GFX9-NEXT: v_writelane_b32 v40, s42, 6
-; GFX9-NEXT: v_writelane_b32 v40, s43, 7
-; GFX9-NEXT: v_writelane_b32 v40, s44, 8
-; GFX9-NEXT: v_writelane_b32 v40, s45, 9
-; GFX9-NEXT: v_writelane_b32 v40, s46, 10
-; GFX9-NEXT: v_writelane_b32 v40, s47, 11
-; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
-; GFX9-NEXT: s_load_dword s22, s[4:5], 0x0
-; GFX9-NEXT: v_writelane_b32 v40, s48, 12
-; GFX9-NEXT: v_writelane_b32 v40, s49, 13
-; GFX9-NEXT: v_writelane_b32 v40, s50, 14
-; GFX9-NEXT: v_writelane_b32 v40, s51, 15
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
+; GFX9-NEXT: v_writelane_b32 v40, s33, 28
+; GFX9-NEXT: v_writelane_b32 v40, s4, 0
+; GFX9-NEXT: v_writelane_b32 v40, s5, 1
+; GFX9-NEXT: v_writelane_b32 v40, s6, 2
+; GFX9-NEXT: v_writelane_b32 v40, s7, 3
+; GFX9-NEXT: v_writelane_b32 v40, s8, 4
+; GFX9-NEXT: v_writelane_b32 v40, s9, 5
+; GFX9-NEXT: v_writelane_b32 v40, s10, 6
+; GFX9-NEXT: v_writelane_b32 v40, s11, 7
+; GFX9-NEXT: v_writelane_b32 v40, s12, 8
+; GFX9-NEXT: v_writelane_b32 v40, s13, 9
+; GFX9-NEXT: v_writelane_b32 v40, s14, 10
+; GFX9-NEXT: v_writelane_b32 v40, s15, 11
+; GFX9-NEXT: v_writelane_b32 v40, s16, 12
+; GFX9-NEXT: v_writelane_b32 v40, s17, 13
+; GFX9-NEXT: v_writelane_b32 v40, s18, 14
+; GFX9-NEXT: v_writelane_b32 v40, s19, 15
+; GFX9-NEXT: v_writelane_b32 v40, s20, 16
+; GFX9-NEXT: v_writelane_b32 v40, s21, 17
+; GFX9-NEXT: v_writelane_b32 v40, s22, 18
+; GFX9-NEXT: v_writelane_b32 v40, s23, 19
+; GFX9-NEXT: v_writelane_b32 v40, s24, 20
+; GFX9-NEXT: v_writelane_b32 v40, s25, 21
+; GFX9-NEXT: v_writelane_b32 v40, s26, 22
+; GFX9-NEXT: v_writelane_b32 v40, s27, 23
+; GFX9-NEXT: v_writelane_b32 v40, s28, 24
+; GFX9-NEXT: v_writelane_b32 v40, s29, 25
+; GFX9-NEXT: v_writelane_b32 v40, s30, 26
+; GFX9-NEXT: v_writelane_b32 v40, s31, 27
+; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
-; GFX9-NEXT: v_mov_b32_e32 v0, s22
-; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0
+; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31
+; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31
+; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
+; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s34
+; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12
; GFX9-NEXT: v_mov_b32_e32 v0, s46
; GFX9-NEXT: v_mov_b32_e32 v1, s47
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32
; GFX9-NEXT: v_mov_b32_e32 v0, s49
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
; GFX9-NEXT: v_mov_b32_e32 v0, s50
-; GFX9-NEXT: v_writelane_b32 v40, s30, 16
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; GFX9-NEXT: v_mov_b32_e32 v0, s51
; GFX9-NEXT: s_mov_b32 s20, s36
; GFX9-NEXT: s_mov_b32 s27, s43
; GFX9-NEXT: s_mov_b32 s28, s44
; GFX9-NEXT: s_mov_b32 s29, s45
-; GFX9-NEXT: v_writelane_b32 v40, s31, 17
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20
-; GFX9-NEXT: s_getpc_b64 s[30:31]
-; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 16
-; GFX9-NEXT: v_readlane_b32 s5, v40, 17
-; GFX9-NEXT: v_readlane_b32 s51, v40, 15
-; GFX9-NEXT: v_readlane_b32 s50, v40, 14
-; GFX9-NEXT: v_readlane_b32 s49, v40, 13
-; GFX9-NEXT: v_readlane_b32 s48, v40, 12
-; GFX9-NEXT: v_readlane_b32 s47, v40, 11
-; GFX9-NEXT: v_readlane_b32 s46, v40, 10
-; GFX9-NEXT: v_readlane_b32 s45, v40, 9
-; GFX9-NEXT: v_readlane_b32 s44, v40, 8
-; GFX9-NEXT: v_readlane_b32 s43, v40, 7
-; GFX9-NEXT: v_readlane_b32 s42, v40, 6
-; GFX9-NEXT: v_readlane_b32 s41, v40, 5
-; GFX9-NEXT: v_readlane_b32 s40, v40, 4
-; GFX9-NEXT: v_readlane_b32 s39, v40, 3
-; GFX9-NEXT: v_readlane_b32 s38, v40, 2
-; GFX9-NEXT: v_readlane_b32 s37, v40, 1
-; GFX9-NEXT: v_readlane_b32 s36, v40, 0
+; GFX9-NEXT: v_readlane_b32 s30, v40, 26
+; GFX9-NEXT: v_readlane_b32 s31, v40, 27
+; GFX9-NEXT: v_readlane_b32 s29, v40, 25
+; GFX9-NEXT: v_readlane_b32 s28, v40, 24
+; GFX9-NEXT: v_readlane_b32 s27, v40, 23
+; GFX9-NEXT: v_readlane_b32 s26, v40, 22
+; GFX9-NEXT: v_readlane_b32 s25, v40, 21
+; GFX9-NEXT: v_readlane_b32 s24, v40, 20
+; GFX9-NEXT: v_readlane_b32 s23, v40, 19
+; GFX9-NEXT: v_readlane_b32 s22, v40, 18
+; GFX9-NEXT: v_readlane_b32 s21, v40, 17
+; GFX9-NEXT: v_readlane_b32 s20, v40, 16
+; GFX9-NEXT: v_readlane_b32 s19, v40, 15
+; GFX9-NEXT: v_readlane_b32 s18, v40, 14
+; GFX9-NEXT: v_readlane_b32 s17, v40, 13
+; GFX9-NEXT: v_readlane_b32 s16, v40, 12
+; GFX9-NEXT: v_readlane_b32 s15, v40, 11
+; GFX9-NEXT: v_readlane_b32 s14, v40, 10
+; GFX9-NEXT: v_readlane_b32 s13, v40, 9
+; GFX9-NEXT: v_readlane_b32 s12, v40, 8
+; GFX9-NEXT: v_readlane_b32 s11, v40, 7
+; GFX9-NEXT: v_readlane_b32 s10, v40, 6
+; GFX9-NEXT: v_readlane_b32 s9, v40, 5
+; GFX9-NEXT: v_readlane_b32 s8, v40, 4
+; GFX9-NEXT: v_readlane_b32 s7, v40, 3
+; GFX9-NEXT: v_readlane_b32 s6, v40, 2
+; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
-; GFX9-NEXT: v_readlane_b32 s33, v40, 18
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: v_readlane_b32 s33, v40, 28
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
-; GFX10-NEXT: v_writelane_b32 v40, s33, 18
-; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0
-; GFX10-NEXT: s_load_dword s22, s[4:5], 0x0
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
+; GFX10-NEXT: v_writelane_b32 v40, s33, 28
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_addk_i32 s32, 0x200
-; GFX10-NEXT: v_writelane_b32 v40, s36, 0
-; GFX10-NEXT: v_writelane_b32 v40, s37, 1
-; GFX10-NEXT: v_writelane_b32 v40, s38, 2
-; GFX10-NEXT: v_writelane_b32 v40, s39, 3
+; GFX10-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-NEXT: v_writelane_b32 v40, s19, 15
+; GFX10-NEXT: v_writelane_b32 v40, s20, 16
+; GFX10-NEXT: v_writelane_b32 v40, s21, 17
+; GFX10-NEXT: v_writelane_b32 v40, s22, 18
+; GFX10-NEXT: v_writelane_b32 v40, s23, 19
+; GFX10-NEXT: v_writelane_b32 v40, s24, 20
+; GFX10-NEXT: v_writelane_b32 v40, s25, 21
+; GFX10-NEXT: v_writelane_b32 v40, s26, 22
+; GFX10-NEXT: v_writelane_b32 v40, s27, 23
+; GFX10-NEXT: v_writelane_b32 v40, s28, 24
+; GFX10-NEXT: v_writelane_b32 v40, s29, 25
+; GFX10-NEXT: v_writelane_b32 v40, s30, 26
+; GFX10-NEXT: v_writelane_b32 v40, s31, 27
+; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s22
-; GFX10-NEXT: v_writelane_b32 v40, s40, 4
-; GFX10-NEXT: v_writelane_b32 v40, s41, 5
-; GFX10-NEXT: v_writelane_b32 v40, s42, 6
-; GFX10-NEXT: v_writelane_b32 v40, s43, 7
-; GFX10-NEXT: v_writelane_b32 v40, s44, 8
-; GFX10-NEXT: v_writelane_b32 v40, s45, 9
-; GFX10-NEXT: v_writelane_b32 v40, s46, 10
-; GFX10-NEXT: v_writelane_b32 v40, s47, 11
-; GFX10-NEXT: v_writelane_b32 v40, s48, 12
-; GFX10-NEXT: v_writelane_b32 v40, s49, 13
-; GFX10-NEXT: v_writelane_b32 v40, s50, 14
-; GFX10-NEXT: v_writelane_b32 v40, s51, 15
-; GFX10-NEXT: s_clause 0x1
-; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40
-; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
-; GFX10-NEXT: v_writelane_b32 v40, s30, 16
-; GFX10-NEXT: v_writelane_b32 v40, s31, 17
+; GFX10-NEXT: s_clause 0x2
+; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0
+; GFX10-NEXT: ; meta instruction
+; GFX10-NEXT: ; meta instruction
+; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40
+; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
; GFX10-NEXT: s_getpc_b64 s[30:31]
; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, s46
+; GFX10-NEXT: v_mov_b32_e32 v0, s34
; GFX10-NEXT: v_mov_b32_e32 v1, s47
; GFX10-NEXT: v_mov_b32_e32 v2, s48
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24
+; GFX10-NEXT: v_mov_b32_e32 v0, s46
; GFX10-NEXT: v_mov_b32_e32 v3, s49
; GFX10-NEXT: s_mov_b32 s20, s36
; GFX10-NEXT: s_mov_b32 s21, s37
; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16
; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20
; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 16
-; GFX10-NEXT: v_readlane_b32 s5, v40, 17
-; GFX10-NEXT: v_readlane_b32 s51, v40, 15
-; GFX10-NEXT: v_readlane_b32 s50, v40, 14
-; GFX10-NEXT: v_readlane_b32 s49, v40, 13
-; GFX10-NEXT: v_readlane_b32 s48, v40, 12
-; GFX10-NEXT: v_readlane_b32 s47, v40, 11
-; GFX10-NEXT: v_readlane_b32 s46, v40, 10
-; GFX10-NEXT: v_readlane_b32 s45, v40, 9
-; GFX10-NEXT: v_readlane_b32 s44, v40, 8
-; GFX10-NEXT: v_readlane_b32 s43, v40, 7
-; GFX10-NEXT: v_readlane_b32 s42, v40, 6
-; GFX10-NEXT: v_readlane_b32 s41, v40, 5
-; GFX10-NEXT: v_readlane_b32 s40, v40, 4
-; GFX10-NEXT: v_readlane_b32 s39, v40, 3
-; GFX10-NEXT: v_readlane_b32 s38, v40, 2
-; GFX10-NEXT: v_readlane_b32 s37, v40, 1
-; GFX10-NEXT: v_readlane_b32 s36, v40, 0
+; GFX10-NEXT: v_readlane_b32 s30, v40, 26
+; GFX10-NEXT: v_readlane_b32 s31, v40, 27
+; GFX10-NEXT: v_readlane_b32 s29, v40, 25
+; GFX10-NEXT: v_readlane_b32 s28, v40, 24
+; GFX10-NEXT: v_readlane_b32 s27, v40, 23
+; GFX10-NEXT: v_readlane_b32 s26, v40, 22
+; GFX10-NEXT: v_readlane_b32 s25, v40, 21
+; GFX10-NEXT: v_readlane_b32 s24, v40, 20
+; GFX10-NEXT: v_readlane_b32 s23, v40, 19
+; GFX10-NEXT: v_readlane_b32 s22, v40, 18
+; GFX10-NEXT: v_readlane_b32 s21, v40, 17
+; GFX10-NEXT: v_readlane_b32 s20, v40, 16
+; GFX10-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
-; GFX10-NEXT: v_readlane_b32 s33, v40, 18
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: v_readlane_b32 s33, v40, 28
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg:
; GFX10-SCRATCH: ; %bb.0:
; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28
; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: s_clause 0x2
; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16
-; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20
; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49
; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36
; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37
; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21
; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39
; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40
; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41
+; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24
+; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
+; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22
; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23
; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24
; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25
; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45
-; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24
-; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16
-; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26
+; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef
%val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0
%val1 = load i32, i32 addrspace(4)* undef
; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
ret void
; GFX9-LABEL: stack_12xv3i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v30, 10
; GFX9-NEXT: v_mov_b32_e32 v31, 11
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: stack_12xv3i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v40, s33, 2
; GFX10-NEXT: v_mov_b32_e32 v0, 12
; GFX10-NEXT: v_mov_b32_e32 v1, 13
; GFX10-NEXT: v_mov_b32_e32 v30, 10
; GFX10-NEXT: v_mov_b32_e32 v31, 11
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: stack_12xv3i32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
call amdgpu_gfx void @external_void_func_12xv3i32(
<3 x i32><i32 0, i32 0, i32 0>,
; GFX9-LABEL: stack_8xv5i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v30, 6
; GFX9-NEXT: v_mov_b32_e32 v31, 7
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: stack_8xv5i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 8
; GFX10-NEXT: v_mov_b32_e32 v1, 9
; GFX10-NEXT: v_mov_b32_e32 v2, 10
; GFX10-NEXT: v_mov_b32_e32 v30, 6
; GFX10-NEXT: v_mov_b32_e32 v31, 7
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: stack_8xv5i32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
call amdgpu_gfx void @external_void_func_8xv5i32(
<5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
; GFX9-LABEL: stack_8xv5f32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT: s_mov_b64 exec, s[4:5]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v40, s33, 2
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000
; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
-; GFX9-NEXT: s_getpc_b64 s[4:5]
-; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
-; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX9-NEXT: v_readlane_b32 s4, v40, 0
-; GFX9-NEXT: v_readlane_b32 s5, v40, 1
+; GFX9-NEXT: s_getpc_b64 s[30:31]
+; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4
+; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX9-NEXT: v_readlane_b32 s30, v40, 0
+; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: v_readlane_b32 s33, v40, 2
-; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT: s_mov_b64 exec, s[6:7]
+; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: stack_8xv5f32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_or_saveexec_b32 s4, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s4
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000
; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000
; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000
; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000
; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
-; GFX10-NEXT: s_getpc_b64 s[4:5]
-; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
-; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
-; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
-; GFX10-NEXT: v_readlane_b32 s4, v40, 0
-; GFX10-NEXT: v_readlane_b32 s5, v40, 1
+; GFX10-NEXT: s_getpc_b64 s[30:31]
+; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4
+; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12
+; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31]
+; GFX10-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-NEXT: s_or_saveexec_b32 s6, -1
+; GFX10-NEXT: s_or_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-NEXT: s_mov_b32 exec_lo, s6
+; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: s_setpc_b64 s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SCRATCH-LABEL: stack_8xv5f32:
; GFX10-SCRATCH: ; %bb.0: ; %entry
; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0
-; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0
+; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16
; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2
-; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1
+; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload
; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3
-; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2
+; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1]
+; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
entry:
call amdgpu_gfx void @external_void_func_8xv5f32(
<5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,