; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
; FIXME: Should be using s_load_dword
-; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
+; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
; FIXME: Should be using s_load_dword
; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
; FIXME: Should be using s_load_dword
; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
; FIXME: Should be using s_load_dword
; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
; FIXME: Should be using s_load_dword
; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8
; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0
-; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
-; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]]
+; HSA-VI-DAG: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]]
; FIXME: Should be using s_load_dword
; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]
; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
-; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
-; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
; SI-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}}
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
; SI: v_cndmask_b32_e32
-; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
; SI: v_cndmask_b32_e32
; SI: buffer_store_dwordx2
define void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
-; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
; SI: s_addc_u32
-; SI: buffer_store_dword v[[LO_VREG]],
-; SI: v_mov_b32_e32
+; SI: v_mov_b32_e32 v[[LO_VREG0:[0-9]+]], s[[LO_SREG2]]
+; SI: v_mov_b32_e32 v[[LO_VREG1:[0-9]+]], s[[LO_SREG2]]
; SI: v_mov_b32_e32
+; SI: buffer_store_dword v[[LO_VREG1]],
+; SI: buffer_store_dwordx2 v{{\[}}[[LO_VREG0]]:
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
%aa = add i64 %a, 234 ; Prevent shrinking store.
%b = shl i64 %aa, 2