let HasExtSDWA9 = 0;
}
-class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
let NeedPatGen = mode;
}
; CI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, [[LHS]]
; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
-; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CI: v_ashr_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
; CI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}v_ubfe_sub_i32:
; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
-; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
-; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
-
-; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
-; VI-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
+; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
+; GCN-NEXT: v_lshrrev_b32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
; GCN: [[BFE]]
; GCN: [[SHL]]
; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]]
; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]]
-; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]]
-; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]]
-
-; VI-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
-; VI-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
+; GCN-NEXT: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], [[SUB]], [[SRC]]
+; GCN-NEXT: v_ashrrev_i32_e32 [[BFE:v[0-9]+]], [[SUB]], [[SHL]]
; GCN: [[BFE]]
; GCN: [[SHL]]
; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm
; SI-NEXT: v_and_b32_e32 v0, 7, v0
; SI-NEXT: s_waitcnt vmcnt(0)
-; SI-NEXT: v_lshr_b32_e32 v0, v2, v0
+; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2
; SI-NEXT: v_and_b32_e32 v0, 1, v0
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; SI-NEXT: v_and_b32_e32 v14, s0, v2
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_and_b32_e32 v15, 0xffff, v3
+; SI-NEXT: v_and_b32_e32 v15, s0, v3
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_bcnt_u32_b32_e64 v7, v7, 0
; SI-NEXT: v_bcnt_u32_b32_e64 v6, v6, 0
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
; VI-NEXT: v_lshlrev_b32_e32 v0, 5, v0
-; VI-NEXT: v_mov_b32_e32 v8, 0xffff
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
; VI-NEXT: s_mov_b32 s0, 0xffff
; VI-NEXT: s_waitcnt vmcnt(1)
-; VI-NEXT: v_lshrrev_b32_e32 v9, 16, v3
-; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
-; VI-NEXT: v_lshrrev_b32_e32 v11, 16, v1
-; VI-NEXT: v_lshrrev_b32_e32 v12, 16, v0
-; VI-NEXT: v_and_b32_e32 v3, v8, v3
-; VI-NEXT: v_and_b32_e32 v2, v8, v2
-; VI-NEXT: v_and_b32_e32 v1, v8, v1
-; VI-NEXT: v_and_b32_e32 v0, v8, v0
+; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2
+; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1
+; VI-NEXT: v_lshrrev_b32_e32 v11, 16, v0
+; VI-NEXT: v_and_b32_e32 v3, s0, v3
+; VI-NEXT: v_and_b32_e32 v2, s0, v2
+; VI-NEXT: v_and_b32_e32 v1, s0, v1
+; VI-NEXT: v_and_b32_e32 v0, s0, v0
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v7
+; VI-NEXT: v_lshrrev_b32_e32 v12, 16, v7
; VI-NEXT: v_lshrrev_b32_e32 v13, 16, v6
; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v5
; VI-NEXT: v_lshrrev_b32_e32 v15, 16, v4
+; VI-NEXT: v_bcnt_u32_b32 v8, v8, 0
; VI-NEXT: v_bcnt_u32_b32 v9, v9, 0
; VI-NEXT: v_bcnt_u32_b32 v10, v10, 0
; VI-NEXT: v_bcnt_u32_b32 v11, v11, 0
-; VI-NEXT: v_bcnt_u32_b32 v12, v12, 0
; VI-NEXT: v_and_b32_e32 v7, s0, v7
; VI-NEXT: v_and_b32_e32 v6, s0, v6
; VI-NEXT: v_and_b32_e32 v5, s0, v5
; VI-NEXT: v_bcnt_u32_b32 v2, v2, 0
; VI-NEXT: v_bcnt_u32_b32 v1, v1, 0
; VI-NEXT: v_bcnt_u32_b32 v0, v0, 0
-; VI-NEXT: v_bcnt_u32_b32 v8, v8, 0
+; VI-NEXT: v_bcnt_u32_b32 v12, v12, 0
; VI-NEXT: v_bcnt_u32_b32 v13, v13, 0
; VI-NEXT: v_bcnt_u32_b32 v14, v14, 0
; VI-NEXT: v_bcnt_u32_b32 v15, v15, 0
+; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; VI-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; VI-NEXT: v_lshlrev_b32_e32 v10, 16, v10
; VI-NEXT: v_lshlrev_b32_e32 v11, 16, v11
-; VI-NEXT: v_lshlrev_b32_e32 v12, 16, v12
; VI-NEXT: v_bcnt_u32_b32 v7, v7, 0
; VI-NEXT: v_bcnt_u32_b32 v6, v6, 0
; VI-NEXT: v_bcnt_u32_b32 v5, v5, 0
; VI-NEXT: v_bcnt_u32_b32 v4, v4, 0
-; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v8
+; VI-NEXT: v_lshlrev_b32_e32 v12, 16, v12
; VI-NEXT: v_lshlrev_b32_e32 v13, 16, v13
; VI-NEXT: v_lshlrev_b32_e32 v14, 16, v14
; VI-NEXT: v_lshlrev_b32_e32 v15, 16, v15
-; VI-NEXT: v_or_b32_e32 v3, v3, v9
-; VI-NEXT: v_or_b32_e32 v2, v2, v10
-; VI-NEXT: v_or_b32_e32 v1, v1, v11
-; VI-NEXT: v_or_b32_e32 v0, v0, v12
-; VI-NEXT: v_or_b32_e32 v7, v7, v8
+; VI-NEXT: v_or_b32_e32 v3, v3, v8
+; VI-NEXT: v_or_b32_e32 v2, v2, v9
+; VI-NEXT: v_or_b32_e32 v1, v1, v10
+; VI-NEXT: v_or_b32_e32 v0, v0, v11
+; VI-NEXT: v_or_b32_e32 v7, v7, v12
; VI-NEXT: v_or_b32_e32 v6, v6, v13
; VI-NEXT: v_or_b32_e32 v5, v5, v14
; VI-NEXT: v_or_b32_e32 v4, v4, v15
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
-; SI-NEXT: v_lshl_b32_e32 v0, v0, v1
-; SI-NEXT: v_lshr_b32_e32 v0, v0, v1
+; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
+; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: bzhi32_d1_indexzext:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s32
-; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-NEXT: scratch_store_dword v2, v3, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mov_b32_e32 v1, 15
-; GFX10-NEXT: v_mov_b32_e32 v2, s32
-; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-NEXT: scratch_store_dword v0, v1, off
+; GFX10-NEXT: v_mov_b32_e32 v1, s32
+; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-NEXT: scratch_store_dword v0, v3, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s32
-; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
-; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s32
-; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off
+; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s32
+; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
bb:
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
-; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-NEXT: scratch_store_dword v2, v3, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
-; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
-; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc
+; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo
+; GFX10-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: scratch_store_dword v0, v1, off
+; GFX10-NEXT: scratch_store_dword v0, v3, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
-; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100
-; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
-; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc
+; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo
+; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off
+; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
bb:
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
-; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-NEXT: scratch_store_dword v2, v3, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
-; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
+; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo
+; GFX10-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: scratch_store_dword v0, v1, off
+; GFX10-NEXT: scratch_store_dword v0, v3, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
-; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
-; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
; GFX10-PAL: ; %bb.0: ; %bb
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004
-; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
-; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
-; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
-; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
+; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
+; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo
+; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
+; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
+; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1
+; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off
+; GFX10-PAL-NEXT: scratch_store_dword v0, v3, off
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-PAL-NEXT: scratch_load_dword v0, v2, off glc dlc
+; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
bb:
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
-; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64
+; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
-; GFX7-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX7-NEXT: buffer_load_ushort v16, off, s[0:3], 0
; GFX7-NEXT: s_mov_b32 s4, 0xffff
; GFX7-NEXT: s_addc_u32 s13, s13, 0
+; GFX7-NEXT: s_waitcnt vmcnt(2)
+; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 4
+; GFX7-NEXT: v_bfe_i32 v3, v2, 4, 4
; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_bfe_i32 v1, v3, 0, 4
-; GFX7-NEXT: v_bfe_i32 v4, v3, 4, 4
-; GFX7-NEXT: v_bfe_i32 v5, v3, 8, 4
-; GFX7-NEXT: v_bfe_i32 v6, v3, 12, 4
-; GFX7-NEXT: v_bfe_i32 v7, v3, 16, 4
-; GFX7-NEXT: v_bfe_i32 v8, v3, 20, 4
-; GFX7-NEXT: v_bfe_i32 v9, v3, 24, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 28, v3
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_bfe_i32 v10, v0, 0, 4
-; GFX7-NEXT: v_bfe_i32 v11, v0, 4, 4
-; GFX7-NEXT: v_bfe_i32 v12, v0, 8, 4
-; GFX7-NEXT: v_bfe_i32 v13, v0, 12, 4
-; GFX7-NEXT: v_bfe_i32 v14, v0, 16, 4
-; GFX7-NEXT: v_bfe_i32 v15, v0, 20, 4
-; GFX7-NEXT: v_bfe_i32 v16, v0, 24, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v0, 28, v0
-; GFX7-NEXT: v_and_b32_e32 v9, v2, v9
-; GFX7-NEXT: v_and_b32_e32 v3, v2, v3
-; GFX7-NEXT: v_and_b32_e32 v15, v2, v15
-; GFX7-NEXT: v_and_b32_e32 v16, v2, v16
-; GFX7-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX7-NEXT: buffer_load_ushort v2, off, s[0:3], 0
+; GFX7-NEXT: v_bfe_i32 v9, v0, 0, 4
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
+; GFX7-NEXT: v_bfe_i32 v10, v0, 4, 4
+; GFX7-NEXT: v_and_b32_e32 v9, s4, v9
+; GFX7-NEXT: v_bfe_i32 v4, v2, 8, 4
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX7-NEXT: v_bfe_i32 v11, v0, 8, 4
; GFX7-NEXT: v_and_b32_e32 v10, s4, v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_mad_u32_u24 v1, v1, v9, v16
+; GFX7-NEXT: v_bfe_i32 v5, v2, 12, 4
; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_bfe_i32 v12, v0, 12, 4
; GFX7-NEXT: v_and_b32_e32 v11, s4, v11
+; GFX7-NEXT: v_mad_u32_u24 v1, v3, v10, v1
+; GFX7-NEXT: v_bfe_i32 v6, v2, 16, 4
; GFX7-NEXT: v_and_b32_e32 v5, s4, v5
+; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4
; GFX7-NEXT: v_and_b32_e32 v12, s4, v12
+; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1
+; GFX7-NEXT: v_bfe_i32 v7, v2, 20, 4
; GFX7-NEXT: v_and_b32_e32 v6, s4, v6
+; GFX7-NEXT: v_bfe_i32 v14, v0, 20, 4
; GFX7-NEXT: v_and_b32_e32 v13, s4, v13
+; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1
+; GFX7-NEXT: v_bfe_i32 v8, v2, 24, 4
; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
+; GFX7-NEXT: v_bfe_i32 v15, v0, 24, 4
; GFX7-NEXT: v_and_b32_e32 v14, s4, v14
-; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v1, v1, v10, v2
-; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1
-; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1
; GFX7-NEXT: v_mad_u32_u24 v1, v6, v13, v1
+; GFX7-NEXT: v_ashrrev_i32_e32 v2, 28, v2
+; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
+; GFX7-NEXT: v_ashrrev_i32_e32 v0, 28, v0
+; GFX7-NEXT: v_and_b32_e32 v15, s4, v15
; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1
+; GFX7-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1
-; GFX7-NEXT: v_mad_u32_u24 v1, v9, v16, v1
-; GFX7-NEXT: v_mad_u32_u24 v0, v3, v0, v1
+; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
-; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64
+; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
-; GFX7-NEXT: v_mov_b32_e32 v2, 0xff
+; GFX7-NEXT: buffer_load_ubyte v16, off, s[0:3], 0
; GFX7-NEXT: s_movk_i32 s4, 0xff
; GFX7-NEXT: s_addc_u32 s13, s13, 0
+; GFX7-NEXT: s_waitcnt vmcnt(2)
+; GFX7-NEXT: v_bfe_i32 v1, v2, 0, 4
+; GFX7-NEXT: v_bfe_i32 v3, v2, 4, 4
; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_bfe_i32 v1, v3, 0, 4
-; GFX7-NEXT: v_bfe_i32 v4, v3, 4, 4
-; GFX7-NEXT: v_bfe_i32 v5, v3, 8, 4
-; GFX7-NEXT: v_bfe_i32 v6, v3, 12, 4
-; GFX7-NEXT: v_bfe_i32 v7, v3, 16, 4
-; GFX7-NEXT: v_bfe_i32 v8, v3, 20, 4
-; GFX7-NEXT: v_bfe_i32 v9, v3, 24, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v3, 28, v3
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_bfe_i32 v10, v0, 0, 4
-; GFX7-NEXT: v_bfe_i32 v11, v0, 4, 4
-; GFX7-NEXT: v_bfe_i32 v12, v0, 8, 4
-; GFX7-NEXT: v_bfe_i32 v13, v0, 12, 4
-; GFX7-NEXT: v_bfe_i32 v14, v0, 16, 4
-; GFX7-NEXT: v_bfe_i32 v15, v0, 20, 4
-; GFX7-NEXT: v_bfe_i32 v16, v0, 24, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v0, 28, v0
-; GFX7-NEXT: v_and_b32_e32 v9, v2, v9
-; GFX7-NEXT: v_and_b32_e32 v3, v2, v3
-; GFX7-NEXT: v_and_b32_e32 v15, v2, v15
-; GFX7-NEXT: v_and_b32_e32 v16, v2, v16
-; GFX7-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX7-NEXT: buffer_load_ubyte v2, off, s[0:3], 0
+; GFX7-NEXT: v_bfe_i32 v9, v0, 0, 4
; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
+; GFX7-NEXT: v_bfe_i32 v10, v0, 4, 4
+; GFX7-NEXT: v_and_b32_e32 v9, s4, v9
+; GFX7-NEXT: v_bfe_i32 v4, v2, 8, 4
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX7-NEXT: v_bfe_i32 v11, v0, 8, 4
; GFX7-NEXT: v_and_b32_e32 v10, s4, v10
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: v_mad_u32_u24 v1, v1, v9, v16
+; GFX7-NEXT: v_bfe_i32 v5, v2, 12, 4
; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_bfe_i32 v12, v0, 12, 4
; GFX7-NEXT: v_and_b32_e32 v11, s4, v11
+; GFX7-NEXT: v_mad_u32_u24 v1, v3, v10, v1
+; GFX7-NEXT: v_bfe_i32 v6, v2, 16, 4
; GFX7-NEXT: v_and_b32_e32 v5, s4, v5
+; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4
; GFX7-NEXT: v_and_b32_e32 v12, s4, v12
+; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1
+; GFX7-NEXT: v_bfe_i32 v7, v2, 20, 4
; GFX7-NEXT: v_and_b32_e32 v6, s4, v6
+; GFX7-NEXT: v_bfe_i32 v14, v0, 20, 4
; GFX7-NEXT: v_and_b32_e32 v13, s4, v13
+; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1
+; GFX7-NEXT: v_bfe_i32 v8, v2, 24, 4
; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
+; GFX7-NEXT: v_bfe_i32 v15, v0, 24, 4
; GFX7-NEXT: v_and_b32_e32 v14, s4, v14
-; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v1, v1, v10, v2
-; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1
-; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1
; GFX7-NEXT: v_mad_u32_u24 v1, v6, v13, v1
+; GFX7-NEXT: v_ashrrev_i32_e32 v2, 28, v2
+; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
+; GFX7-NEXT: v_ashrrev_i32_e32 v0, 28, v0
+; GFX7-NEXT: v_and_b32_e32 v15, s4, v15
; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1
+; GFX7-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
; GFX7-NEXT: v_mad_u32_u24 v1, v8, v15, v1
-; GFX7-NEXT: v_mad_u32_u24 v1, v9, v16, v1
-; GFX7-NEXT: v_mad_u32_u24 v0, v3, v0, v1
+; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1
; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
-; GFX7-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64
+; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
-; GFX7-NEXT: s_mov_b32 s4, 0xffff
-; GFX7-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: buffer_load_ushort v16, off, s[0:3], 0
+; GFX7-NEXT: s_mov_b32 s4, 0xffff
; GFX7-NEXT: s_addc_u32 s13, s13, 0
-; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_bfe_i32 v1, v3, 20, 4
-; GFX7-NEXT: v_bfe_i32 v4, v3, 16, 4
-; GFX7-NEXT: v_bfe_i32 v5, v3, 4, 4
-; GFX7-NEXT: v_bfe_i32 v6, v3, 0, 4
+; GFX7-NEXT: s_waitcnt vmcnt(2)
+; GFX7-NEXT: v_bfe_i32 v1, v2, 20, 4
+; GFX7-NEXT: v_bfe_i32 v3, v2, 16, 4
+; GFX7-NEXT: v_bfe_i32 v4, v2, 4, 4
+; GFX7-NEXT: v_bfe_i32 v5, v2, 0, 4
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
-; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX7-NEXT: v_and_b32_e32 v6, s4, v6
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4
+; GFX7-NEXT: v_and_b32_e32 v5, s4, v5
+; GFX7-NEXT: s_waitcnt vmcnt(1)
+; GFX7-NEXT: v_bfe_i32 v9, v0, 20, 4
+; GFX7-NEXT: v_bfe_i32 v10, v0, 16, 4
+; GFX7-NEXT: v_bfe_i32 v11, v0, 4, 4
+; GFX7-NEXT: v_bfe_i32 v12, v0, 0, 4
+; GFX7-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX7-NEXT: v_or_b32_e32 v3, v5, v4
+; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v9
+; GFX7-NEXT: v_and_b32_e32 v5, s4, v10
+; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v11
+; GFX7-NEXT: v_and_b32_e32 v10, s4, v12
+; GFX7-NEXT: v_bfe_i32 v13, v0, 24, 4
+; GFX7-NEXT: v_ashrrev_i32_e32 v15, 28, v0
+; GFX7-NEXT: v_or_b32_e32 v4, v5, v4
+; GFX7-NEXT: v_or_b32_e32 v5, v10, v9
+; GFX7-NEXT: v_and_b32_e32 v11, s4, v13
+; GFX7-NEXT: v_and_b32_e32 v13, s4, v15
+; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v3
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v5
+; GFX7-NEXT: v_and_b32_e32 v5, s4, v5
+; GFX7-NEXT: v_bfe_i32 v7, v2, 8, 4
+; GFX7-NEXT: v_bfe_i32 v14, v0, 8, 4
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_bfe_i32 v10, v0, 20, 4
-; GFX7-NEXT: v_bfe_i32 v11, v0, 16, 4
-; GFX7-NEXT: v_bfe_i32 v12, v0, 4, 4
-; GFX7-NEXT: v_bfe_i32 v13, v0, 0, 4
-; GFX7-NEXT: v_or_b32_e32 v1, v4, v1
-; GFX7-NEXT: v_or_b32_e32 v4, v6, v5
-; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v10
-; GFX7-NEXT: v_and_b32_e32 v6, s4, v11
-; GFX7-NEXT: v_lshlrev_b32_e32 v10, 16, v12
-; GFX7-NEXT: v_and_b32_e32 v11, v2, v13
-; GFX7-NEXT: v_bfe_i32 v7, v3, 24, 4
-; GFX7-NEXT: v_bfe_i32 v8, v3, 8, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v9, 28, v3
-; GFX7-NEXT: v_bfe_i32 v3, v3, 12, 4
-; GFX7-NEXT: v_bfe_i32 v14, v0, 24, 4
-; GFX7-NEXT: v_bfe_i32 v15, v0, 8, 4
-; GFX7-NEXT: v_ashrrev_i32_e32 v16, 28, v0
+; GFX7-NEXT: v_mad_u32_u24 v3, v3, v5, v16
+; GFX7-NEXT: v_bfe_i32 v6, v2, 24, 4
+; GFX7-NEXT: v_ashrrev_i32_e32 v8, 28, v2
+; GFX7-NEXT: v_bfe_i32 v2, v2, 12, 4
+; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
; GFX7-NEXT: v_bfe_i32 v0, v0, 12, 4
-; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
-; GFX7-NEXT: v_or_b32_e32 v6, v11, v10
-; GFX7-NEXT: v_and_b32_e32 v3, v2, v3
-; GFX7-NEXT: v_and_b32_e32 v9, v2, v9
-; GFX7-NEXT: v_and_b32_e32 v12, v2, v14
-; GFX7-NEXT: v_and_b32_e32 v13, v2, v15
-; GFX7-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX7-NEXT: v_and_b32_e32 v14, v2, v16
-; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v16, 16, v4
-; GFX7-NEXT: v_and_b32_e32 v4, v2, v4
-; GFX7-NEXT: v_and_b32_e32 v1, v2, v1
-; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v5
-; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v6
-; GFX7-NEXT: v_and_b32_e32 v6, v2, v6
-; GFX7-NEXT: v_and_b32_e32 v2, v2, v5
-; GFX7-NEXT: buffer_load_ushort v5, off, s[0:3], 0
+; GFX7-NEXT: v_and_b32_e32 v12, s4, v14
+; GFX7-NEXT: v_mad_u32_u24 v3, v15, v10, v3
+; GFX7-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_mad_u32_u24 v3, v7, v12, v3
+; GFX7-NEXT: v_lshrrev_b32_e32 v14, 16, v1
+; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
+; GFX7-NEXT: v_lshrrev_b32_e32 v9, 16, v4
+; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
+; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v3
+; GFX7-NEXT: v_mad_u32_u24 v0, v1, v4, v0
+; GFX7-NEXT: v_and_b32_e32 v6, s4, v6
+; GFX7-NEXT: v_mad_u32_u24 v0, v14, v9, v0
; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
-; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v4, v4, v6, v5
-; GFX7-NEXT: v_mad_u32_u24 v4, v16, v11, v4
-; GFX7-NEXT: v_mad_u32_u24 v4, v8, v13, v4
-; GFX7-NEXT: v_mad_u32_u24 v0, v3, v0, v4
-; GFX7-NEXT: v_mad_u32_u24 v0, v1, v2, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v15, v10, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v7, v12, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v9, v14, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v6, v11, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v8, v13, v0
; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
-; GFX7-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64
+; GFX7-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
; GFX7-NEXT: s_movk_i32 s4, 0xff
; GFX7-NEXT: s_mov_b32 s2, -1
-; GFX7-NEXT: v_mov_b32_e32 v2, 0xff
; GFX7-NEXT: s_mov_b32 s5, 0xffff
-; GFX7-NEXT: v_mov_b32_e32 v3, 0xffff
; GFX7-NEXT: s_addc_u32 s13, s13, 0
; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_bfe_i32 v6, v4, 20, 4
-; GFX7-NEXT: v_bfe_i32 v7, v4, 16, 4
-; GFX7-NEXT: v_bfe_i32 v8, v4, 12, 4
-; GFX7-NEXT: v_bfe_i32 v9, v4, 8, 4
-; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6
-; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
-; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8
-; GFX7-NEXT: v_and_b32_e32 v9, s4, v9
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 28, v2
+; GFX7-NEXT: v_bfe_i32 v3, v2, 24, 4
+; GFX7-NEXT: v_lshlrev_b32_e32 v9, 8, v1
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_bfe_i32 v13, v0, 24, 4
-; GFX7-NEXT: v_bfe_i32 v16, v0, 12, 4
-; GFX7-NEXT: v_or_b32_e32 v6, v7, v6
-; GFX7-NEXT: v_or_b32_e32 v7, v9, v8
-; GFX7-NEXT: v_and_b32_e32 v9, v2, v13
+; GFX7-NEXT: v_bfe_i32 v13, v0, 16, 4
+; GFX7-NEXT: v_bfe_i32 v16, v0, 4, 4
+; GFX7-NEXT: v_or_b32_e32 v3, v3, v9
+; GFX7-NEXT: v_and_b32_e32 v9, s4, v13
; GFX7-NEXT: v_lshlrev_b32_e32 v13, 8, v16
; GFX7-NEXT: buffer_load_ubyte v16, off, s[0:3], 0
-; GFX7-NEXT: v_ashrrev_i32_e32 v1, 28, v4
-; GFX7-NEXT: v_bfe_i32 v5, v4, 24, 4
-; GFX7-NEXT: v_bfe_i32 v10, v4, 4, 4
-; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 4
-; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v1
+; GFX7-NEXT: v_bfe_i32 v4, v2, 20, 4
+; GFX7-NEXT: v_bfe_i32 v5, v2, 16, 4
+; GFX7-NEXT: v_bfe_i32 v6, v2, 12, 4
+; GFX7-NEXT: v_bfe_i32 v7, v2, 8, 4
+; GFX7-NEXT: v_bfe_i32 v8, v2, 4, 4
+; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 4
+; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4
; GFX7-NEXT: v_and_b32_e32 v5, s4, v5
-; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v10
-; GFX7-NEXT: v_and_b32_e32 v4, v2, v4
-; GFX7-NEXT: v_ashrrev_i32_e32 v12, 28, v0
-; GFX7-NEXT: v_bfe_i32 v14, v0, 20, 4
-; GFX7-NEXT: v_bfe_i32 v15, v0, 16, 4
-; GFX7-NEXT: v_bfe_i32 v17, v0, 8, 4
-; GFX7-NEXT: v_bfe_i32 v18, v0, 4, 4
+; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6
+; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
+; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v8
+; GFX7-NEXT: v_and_b32_e32 v2, s4, v2
+; GFX7-NEXT: v_ashrrev_i32_e32 v10, 28, v0
+; GFX7-NEXT: v_bfe_i32 v11, v0, 24, 4
+; GFX7-NEXT: v_bfe_i32 v12, v0, 20, 4
+; GFX7-NEXT: v_bfe_i32 v14, v0, 12, 4
+; GFX7-NEXT: v_bfe_i32 v15, v0, 8, 4
; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 4
-; GFX7-NEXT: v_or_b32_e32 v5, v5, v11
-; GFX7-NEXT: v_or_b32_e32 v4, v4, v10
+; GFX7-NEXT: v_or_b32_e32 v4, v5, v4
+; GFX7-NEXT: v_or_b32_e32 v5, v7, v6
+; GFX7-NEXT: v_or_b32_e32 v2, v2, v8
+; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v10
+; GFX7-NEXT: v_and_b32_e32 v7, s4, v11
; GFX7-NEXT: v_lshlrev_b32_e32 v8, 8, v12
-; GFX7-NEXT: v_lshlrev_b32_e32 v10, 8, v14
-; GFX7-NEXT: v_and_b32_e32 v11, v2, v15
-; GFX7-NEXT: v_and_b32_e32 v14, v2, v17
-; GFX7-NEXT: v_lshlrev_b32_e32 v15, 8, v18
-; GFX7-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
-; GFX7-NEXT: v_and_b32_e32 v6, s5, v6
-; GFX7-NEXT: v_or_b32_e32 v8, v9, v8
-; GFX7-NEXT: v_or_b32_e32 v9, v11, v10
-; GFX7-NEXT: v_or_b32_e32 v10, v14, v13
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v15
-; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7
+; GFX7-NEXT: v_lshlrev_b32_e32 v11, 8, v14
+; GFX7-NEXT: v_and_b32_e32 v12, s4, v15
+; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX7-NEXT: v_and_b32_e32 v4, s5, v4
-; GFX7-NEXT: v_or_b32_e32 v5, v6, v5
+; GFX7-NEXT: v_or_b32_e32 v6, v7, v6
+; GFX7-NEXT: v_or_b32_e32 v7, v9, v8
+; GFX7-NEXT: v_or_b32_e32 v8, v12, v11
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v13
+; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5
+; GFX7-NEXT: v_and_b32_e32 v2, s5, v2
+; GFX7-NEXT: v_or_b32_e32 v3, v4, v3
+; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6
; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX7-NEXT: v_lshlrev_b32_e32 v8, 16, v10
-; GFX7-NEXT: v_and_b32_e32 v0, v3, v0
-; GFX7-NEXT: v_or_b32_e32 v4, v4, v7
-; GFX7-NEXT: v_and_b32_e32 v7, v3, v9
-; GFX7-NEXT: v_or_b32_e32 v0, v0, v8
-; GFX7-NEXT: v_or_b32_e32 v3, v7, v6
-; GFX7-NEXT: v_and_b32_e32 v7, v2, v4
-; GFX7-NEXT: v_and_b32_e32 v13, v2, v0
-; GFX7-NEXT: v_bfe_u32 v8, v4, 8, 8
-; GFX7-NEXT: v_bfe_u32 v14, v0, 8, 8
-; GFX7-NEXT: v_lshrrev_b32_e32 v6, 24, v4
-; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8
+; GFX7-NEXT: v_and_b32_e32 v0, s5, v0
+; GFX7-NEXT: v_or_b32_e32 v2, v2, v5
+; GFX7-NEXT: v_or_b32_e32 v0, v0, v6
+; GFX7-NEXT: v_and_b32_e32 v6, s4, v2
+; GFX7-NEXT: v_and_b32_e32 v12, s4, v0
+; GFX7-NEXT: v_and_b32_e32 v5, s5, v7
+; GFX7-NEXT: v_bfe_u32 v7, v2, 8, 8
+; GFX7-NEXT: v_bfe_u32 v13, v0, 8, 8
+; GFX7-NEXT: v_or_b32_e32 v4, v5, v4
+; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v2
+; GFX7-NEXT: v_bfe_u32 v2, v2, 16, 8
; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v0
; GFX7-NEXT: v_bfe_u32 v0, v0, 16, 8
-; GFX7-NEXT: v_and_b32_e32 v1, v2, v1
-; GFX7-NEXT: v_and_b32_e32 v12, v2, v12
-; GFX7-NEXT: v_and_b32_e32 v9, v2, v5
-; GFX7-NEXT: v_and_b32_e32 v2, v2, v3
-; GFX7-NEXT: v_bfe_u32 v10, v5, 8, 8
-; GFX7-NEXT: v_bfe_u32 v15, v3, 8, 8
-; GFX7-NEXT: v_bfe_u32 v5, v5, 16, 8
+; GFX7-NEXT: v_and_b32_e32 v8, s4, v3
+; GFX7-NEXT: v_and_b32_e32 v14, s4, v4
+; GFX7-NEXT: v_bfe_u32 v9, v3, 8, 8
+; GFX7-NEXT: v_bfe_u32 v15, v4, 8, 8
; GFX7-NEXT: v_bfe_u32 v3, v3, 16, 8
+; GFX7-NEXT: v_bfe_u32 v4, v4, 16, 8
+; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
+; GFX7-NEXT: v_and_b32_e32 v10, s4, v10
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mad_u32_u24 v7, v7, v13, v16
-; GFX7-NEXT: v_mad_u32_u24 v7, v8, v14, v7
-; GFX7-NEXT: v_mad_u32_u24 v0, v4, v0, v7
-; GFX7-NEXT: v_mad_u32_u24 v0, v6, v11, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v9, v2, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v10, v15, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v5, v3, v0
-; GFX7-NEXT: v_mad_u32_u24 v0, v1, v12, v0
+; GFX7-NEXT: v_mad_u32_u24 v6, v6, v12, v16
+; GFX7-NEXT: v_mad_u32_u24 v6, v7, v13, v6
+; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v6
+; GFX7-NEXT: v_mad_u32_u24 v0, v5, v11, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v8, v14, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v9, v15, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v3, v4, v0
+; GFX7-NEXT: v_mad_u32_u24 v0, v1, v10, v0
; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: buffer_load_ubyte v16, off, s[0:3], 0
; GFX7-NEXT: s_movk_i32 s4, 0xf00
-; GFX7-NEXT: v_mov_b32_e32 v3, 0xf00
; GFX7-NEXT: s_movk_i32 s5, 0xf0f
; GFX7-NEXT: s_addc_u32 s13, s13, 0
; GFX7-NEXT: s_waitcnt vmcnt(2)
-; GFX7-NEXT: v_lshrrev_b32_e32 v6, 28, v2
-; GFX7-NEXT: v_lshlrev_b32_e32 v9, 4, v2
+; GFX7-NEXT: v_lshrrev_b32_e32 v3, 4, v2
+; GFX7-NEXT: v_lshrrev_b32_e32 v7, 12, v2
; GFX7-NEXT: v_bfe_u32 v1, v2, 8, 4
-; GFX7-NEXT: v_lshrrev_b32_e32 v4, 4, v2
-; GFX7-NEXT: v_and_b32_e32 v5, 15, v2
-; GFX7-NEXT: v_bfe_u32 v7, v2, 16, 4
-; GFX7-NEXT: v_lshrrev_b32_e32 v8, 12, v2
+; GFX7-NEXT: v_lshrrev_b32_e32 v5, 28, v2
+; GFX7-NEXT: v_bfe_u32 v6, v2, 16, 4
+; GFX7-NEXT: v_lshlrev_b32_e32 v8, 4, v2
; GFX7-NEXT: s_waitcnt vmcnt(1)
-; GFX7-NEXT: v_lshrrev_b32_e32 v11, 4, v0
-; GFX7-NEXT: v_alignbit_b32 v2, v6, v2, 24
-; GFX7-NEXT: v_and_b32_e32 v6, s4, v9
-; GFX7-NEXT: v_lshlrev_b32_e32 v9, 4, v0
-; GFX7-NEXT: v_bfe_u32 v10, v0, 8, 4
-; GFX7-NEXT: v_and_b32_e32 v4, s4, v4
-; GFX7-NEXT: v_or_b32_e32 v5, v5, v6
-; GFX7-NEXT: v_and_b32_e32 v6, v3, v9
-; GFX7-NEXT: v_and_b32_e32 v3, v3, v11
-; GFX7-NEXT: v_and_b32_e32 v12, 15, v0
-; GFX7-NEXT: v_or_b32_e32 v1, v1, v4
-; GFX7-NEXT: v_or_b32_e32 v3, v10, v3
-; GFX7-NEXT: v_lshrrev_b32_e32 v13, 28, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v10, 4, v0
+; GFX7-NEXT: v_lshrrev_b32_e32 v12, 28, v0
+; GFX7-NEXT: v_and_b32_e32 v7, s4, v7
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v3
+; GFX7-NEXT: v_and_b32_e32 v4, 15, v2
+; GFX7-NEXT: v_bfe_u32 v9, v0, 8, 4
+; GFX7-NEXT: v_and_b32_e32 v11, 15, v0
+; GFX7-NEXT: v_bfe_u32 v13, v0, 16, 4
+; GFX7-NEXT: v_lshrrev_b32_e32 v14, 12, v0
+; GFX7-NEXT: v_alignbit_b32 v2, v5, v2, 24
+; GFX7-NEXT: v_and_b32_e32 v5, s4, v8
+; GFX7-NEXT: v_lshlrev_b32_e32 v8, 4, v0
+; GFX7-NEXT: v_or_b32_e32 v6, v6, v7
+; GFX7-NEXT: v_or_b32_e32 v1, v1, v3
+; GFX7-NEXT: v_alignbit_b32 v0, v12, v0, 24
+; GFX7-NEXT: v_and_b32_e32 v7, s4, v10
+; GFX7-NEXT: v_or_b32_e32 v4, v4, v5
+; GFX7-NEXT: v_and_b32_e32 v3, s4, v14
+; GFX7-NEXT: v_and_b32_e32 v5, s4, v8
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX7-NEXT: v_or_b32_e32 v6, v12, v6
-; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX7-NEXT: v_bfe_u32 v14, v0, 16, 4
-; GFX7-NEXT: v_lshrrev_b32_e32 v15, 12, v0
-; GFX7-NEXT: v_and_b32_e32 v8, s4, v8
+; GFX7-NEXT: v_and_b32_e32 v0, s5, v0
+; GFX7-NEXT: v_or_b32_e32 v7, v9, v7
; GFX7-NEXT: v_and_b32_e32 v2, s5, v2
-; GFX7-NEXT: v_alignbit_b32 v0, v13, v0, 24
-; GFX7-NEXT: v_or_b32_e32 v1, v5, v1
-; GFX7-NEXT: v_or_b32_e32 v3, v6, v3
-; GFX7-NEXT: v_or_b32_e32 v7, v7, v8
-; GFX7-NEXT: v_and_b32_e32 v4, s4, v15
+; GFX7-NEXT: v_or_b32_e32 v3, v13, v3
+; GFX7-NEXT: v_or_b32_e32 v5, v11, v5
+; GFX7-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v7
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NEXT: v_and_b32_e32 v0, s5, v0
+; GFX7-NEXT: v_or_b32_e32 v0, v3, v0
+; GFX7-NEXT: v_or_b32_e32 v3, v5, v4
+; GFX7-NEXT: v_or_b32_e32 v2, v6, v2
; GFX7-NEXT: v_and_b32_e32 v6, 15, v1
; GFX7-NEXT: v_and_b32_e32 v12, 15, v3
-; GFX7-NEXT: v_or_b32_e32 v4, v14, v4
-; GFX7-NEXT: v_or_b32_e32 v2, v7, v2
-; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX7-NEXT: v_bfe_u32 v7, v1, 8, 4
; GFX7-NEXT: v_bfe_u32 v13, v3, 8, 4
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_mad_u32_u24 v6, v6, v12, v16
-; GFX7-NEXT: v_or_b32_e32 v0, v4, v0
; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v1
; GFX7-NEXT: v_bfe_u32 v1, v1, 16, 4
; GFX7-NEXT: v_lshrrev_b32_e32 v10, 24, v3
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=PRE-GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=GFX8 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
; CHECK-LABEL: {{^}}inline_asm:
; CHECK: s_endpgm
; CHECK: ; def v0
; CHECK: v_mov_b32_e32 v1, v0
; CHECK: ; def v0
-; PRE-GFX8: v_lshl_b32_e32 v{{[0-9]+}}, v1, v0
-; GFX8: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
+; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
define amdgpu_kernel void @muliple_def_phys_vgpr() {
entry:
%def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
; VI-NEXT: v_mov_b32_e32 v1, 0x3e70000
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
;
; VI-NEXT: v_mov_b32_e32 v1, 0xfff10000
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
;
; VI-NEXT: v_mov_b32_e32 v1, 0x45000000
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
;
; VI-NEXT: v_mov_b32_e32 v1, 0x230000
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dword v[2:3], v0
; VI-NEXT: s_endpgm
;
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
;
; VI-NEXT: v_mov_b32_e32 v4, s0
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: s_waitcnt vmcnt(0)
-; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; VI-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
;
; SI-NEXT: v_cvt_i32_f32_e32 v1, v1
; SI-NEXT: s_mov_b64 s[2:3], exec
; SI-NEXT: v_or_b32_e32 v0, v0, v1
-; SI-NEXT: v_and_b32_e32 v1, 1, v0
; SI-NEXT: v_and_b32_e32 v0, 1, v0
-; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; SI-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
; GFX9-NEXT: s_mov_b64 s[2:3], exec
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX9-NEXT: v_and_b32_e32 v1, 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
; GFX10-32-NEXT: v_cvt_i32_f32_e32 v1, v1
; GFX10-32-NEXT: s_mov_b32 s1, exec_lo
; GFX10-32-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX10-32-NEXT: v_and_b32_e32 v1, 1, v0
; GFX10-32-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s0
; GFX10-32-NEXT: s_xor_b32 s0, exec_lo, s2
; GFX10-64-NEXT: v_cvt_i32_f32_e32 v1, v1
; GFX10-64-NEXT: s_mov_b64 s[2:3], exec
; GFX10-64-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX10-64-NEXT: v_and_b32_e32 v1, 1, v0
; GFX10-64-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX10-64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX10-64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; GFX10-64-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; CI-NEXT: v_lshr_b32_e32 v2, v2, v3
-; CI-NEXT: v_lshr_b32_e32 v3, v4, v5
+; CI-NEXT: v_lshrrev_b32_e32 v2, v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v4
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_or_b32_e32 v2, v2, v3
; CI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; CI-NEXT: v_lshr_b32_e32 v3, v3, v5
-; CI-NEXT: v_lshr_b32_e32 v5, v7, v9
-; CI-NEXT: v_lshr_b32_e32 v2, v2, v4
-; CI-NEXT: v_lshr_b32_e32 v4, v6, v8
+; CI-NEXT: v_lshrrev_b32_e32 v3, v5, v3
+; CI-NEXT: v_lshrrev_b32_e32 v5, v9, v7
+; CI-NEXT: v_lshrrev_b32_e32 v2, v4, v2
+; CI-NEXT: v_lshrrev_b32_e32 v4, v8, v6
; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; CI-NEXT: v_or_b32_e32 v3, v3, v5
;
; TODO: Why is the constant not peepholed into the v_or_b32_e32?
;
-; NOSDWA: s_mov_b32 [[CONST:s[0-9]+]], 0x10000
-; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, s0,
+; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000,
; SDWA: v_or_b32_e32 v{{[0-9]+}}, 0x10000,
define amdgpu_kernel void @sdwa_crash_inlineasm_def() #0 {
bb:
; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 8
; CHECK-NEXT: v_mov_b32_e32 v1, 0x54
; CHECK-NEXT: v_ashrrev_i16 v0, 7, v0
-; CHECK-NEXT: v_xor_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; CHECK-NEXT: v_xor_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; CHECK-NEXT: s_setpc_b64 s[30:31]
%c = icmp sgt i8 %a, -1
%s = select i1 %c, i32 84, i32 -85
; GCN: {{buffer|flat|global}}_load_ushort [[VAL0:v[0-9]+]]
; GCN: {{buffer|flat|global}}_load_ushort [[VAL1:v[0-9]+]]
-; SI: v_lshl_b32_e32 [[REG:v[0-9]+]], [[VAL0]], [[VAL1]]
+; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
; GFX89: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]]
; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}}
; SI-NEXT: s_mov_b32 s0, s4
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_add_i32_e32 v0, vcc, 3, v0
-; SI-NEXT: v_lshl_b32_e32 v0, v2, v0
+; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; SI-NEXT: v_lshl_b32_e32 v0, v2, v0
-; SI-NEXT: v_lshl_b32_e32 v1, v1, v3
+; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2
+; SI-NEXT: v_lshlrev_b32_e32 v1, v3, v1
; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-NEXT: v_or_b32_e32 v0, v0, v1
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; SI-NEXT: v_lshl_b32_e32 v3, v3, v5
-; SI-NEXT: v_lshl_b32_e32 v2, v2, v4
-; SI-NEXT: v_lshl_b32_e32 v4, v7, v9
-; SI-NEXT: v_lshl_b32_e32 v5, v6, v8
+; SI-NEXT: v_lshlrev_b32_e32 v3, v5, v3
+; SI-NEXT: v_lshlrev_b32_e32 v2, v4, v2
+; SI-NEXT: v_lshlrev_b32_e32 v4, v9, v7
+; SI-NEXT: v_lshlrev_b32_e32 v5, v8, v6
; SI-NEXT: v_and_b32_e32 v3, s0, v3
; SI-NEXT: v_and_b32_e32 v2, s0, v2
; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; CI-NEXT: v_lshl_b32_e32 v2, v2, v3
-; CI-NEXT: v_lshl_b32_e32 v3, v4, v5
+; CI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, v5, v4
; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2
; CI-NEXT: v_or_b32_e32 v2, v2, v3
; CI-NEXT: s_waitcnt vmcnt(0)
; CI-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v5
-; CI-NEXT: v_lshl_b32_e32 v3, v3, v5
-; CI-NEXT: v_lshl_b32_e32 v2, v2, v4
-; CI-NEXT: v_lshl_b32_e32 v4, v7, v9
-; CI-NEXT: v_lshl_b32_e32 v5, v6, v8
+; CI-NEXT: v_lshlrev_b32_e32 v3, v5, v3
+; CI-NEXT: v_lshlrev_b32_e32 v2, v4, v2
+; CI-NEXT: v_lshlrev_b32_e32 v4, v9, v7
+; CI-NEXT: v_lshlrev_b32_e32 v5, v8, v6
; CI-NEXT: v_and_b32_e32 v3, s0, v3
; CI-NEXT: v_and_b32_e32 v2, s0, v2
; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4
-; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
+; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4
-; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3
+; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11
-; GFX6-NEXT: v_bfrev_b32_e32 v16, 1
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8
-; GFX6-NEXT: v_xor_b32_e32 v3, v16, v3
+; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8
-; GFX6-NEXT: v_xor_b32_e32 v4, v16, v4
+; GFX6-NEXT: v_xor_b32_e32 v4, s6, v4
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8
-; GFX6-NEXT: v_xor_b32_e32 v5, v16, v5
+; GFX6-NEXT: v_xor_b32_e32 v5, s6, v5
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8
-; GFX6-NEXT: v_xor_b32_e32 v6, v16, v6
+; GFX6-NEXT: v_xor_b32_e32 v6, s6, v6
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8
-; GFX6-NEXT: v_xor_b32_e32 v7, v16, v7
+; GFX6-NEXT: v_xor_b32_e32 v7, s6, v7
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11
-; GFX8-NEXT: v_bfrev_b32_e32 v16, 1
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8
-; GFX8-NEXT: v_xor_b32_e32 v3, v16, v3
+; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4
; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8
-; GFX8-NEXT: v_xor_b32_e32 v4, v16, v4
+; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5
; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8
-; GFX8-NEXT: v_xor_b32_e32 v5, v16, v5
+; GFX8-NEXT: v_xor_b32_e32 v5, s6, v5
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6
; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8
-; GFX8-NEXT: v_xor_b32_e32 v6, v16, v6
+; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7
; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8
-; GFX8-NEXT: v_xor_b32_e32 v7, v16, v7
+; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
-; GFX6-NEXT: v_bfrev_b32_e32 v17, 1
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v3, v17, v3
+; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v4, v17, v4
+; GFX6-NEXT: v_xor_b32_e32 v4, s6, v4
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v5, v21
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v5, v17, v5
+; GFX6-NEXT: v_xor_b32_e32 v5, s6, v5
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v6, v22
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v6, v17, v6
+; GFX6-NEXT: v_xor_b32_e32 v6, s6, v6
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v7, v23
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v7, v17, v7
+; GFX6-NEXT: v_xor_b32_e32 v7, s6, v7
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v8, v24
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v8, v17, v8
+; GFX6-NEXT: v_xor_b32_e32 v8, s6, v8
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v9, v25
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v9, v17, v9
+; GFX6-NEXT: v_xor_b32_e32 v9, s6, v9
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v10, v26
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v10, v17, v10
+; GFX6-NEXT: v_xor_b32_e32 v10, s6, v10
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v11, v27
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v11, v17, v11
+; GFX6-NEXT: v_xor_b32_e32 v11, s6, v11
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v12, v28
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v12, v17, v12
+; GFX6-NEXT: v_xor_b32_e32 v12, s6, v12
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v13, v29
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v13, v17, v13
+; GFX6-NEXT: v_xor_b32_e32 v13, s6, v13
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v14, v30
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v14, v17, v14
+; GFX6-NEXT: v_xor_b32_e32 v14, s6, v14
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v31
; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16
-; GFX6-NEXT: v_xor_b32_e32 v15, v17, v15
+; GFX6-NEXT: v_xor_b32_e32 v15, s6, v15
; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3
-; GFX8-NEXT: v_bfrev_b32_e32 v17, 1
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v3, v17, v3
+; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4
; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v4, v17, v4
+; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v5, v21
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v5
; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v5, v17, v5
+; GFX8-NEXT: v_xor_b32_e32 v5, s6, v5
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v5, v16, v5, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v6, v22
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v6
; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v6, v17, v6
+; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v7, v23
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v7
; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v7, v17, v7
+; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v7, v16, v7, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v8, v24
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v8, v17, v8
+; GFX8-NEXT: v_xor_b32_e32 v8, s6, v8
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v8, v16, v8, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v9, v25
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v9
; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v9, v17, v9
+; GFX8-NEXT: v_xor_b32_e32 v9, s6, v9
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v9, v16, v9, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v10, v26
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v10
; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v10, v17, v10
+; GFX8-NEXT: v_xor_b32_e32 v10, s6, v10
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v10, v16, v10, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v11, v27
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v11
; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v11, v17, v11
+; GFX8-NEXT: v_xor_b32_e32 v11, s6, v11
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v11, v16, v11, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v12, v28
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v12
; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v12, v17, v12
+; GFX8-NEXT: v_xor_b32_e32 v12, s6, v12
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v12, v16, v12, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v13, v29
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v13
; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v13, v17, v13
+; GFX8-NEXT: v_xor_b32_e32 v13, s6, v13
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v14, v30
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v14
; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v14, v17, v14
+; GFX8-NEXT: v_xor_b32_e32 v14, s6, v14
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v14, v16, v14, vcc
; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v31
; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v31
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15
; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16
-; GFX8-NEXT: v_xor_b32_e32 v15, v17, v15
+; GFX8-NEXT: v_xor_b32_e32 v15, s6, v15
; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
-; GCN: s_addk_i32 s32, 0x2800{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
+; GCN: s_addk_i32 s32, 0x2800{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
-; GCN: s_addk_i32 s32, 0x3000{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
+; GCN: s_addk_i32 s32, 0x3000{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GFX9-O0-NEXT: s_mov_b32 s35, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3
; GFX9-O0-NEXT: s_mov_b32 s35, 2
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s35
-; GFX9-O0-NEXT: v_and_b32_e32 v3, v3, v4
+; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s35
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b32 s34, 1
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s34, v0
; GFX9-O0-NEXT: s_mov_b32 s34, 2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s34
-; GFX9-O0-NEXT: v_and_b32_e32 v0, v0, v3
+; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s34
; GFX9-O0-NEXT: s_mov_b32 s34, 0
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1