// FIXME: We should choose either a zext or a sext based on other constants
// already around.
def : Pat<(i32 (anyext i1:$in)),
- (SELECT_I4 crbitrc:$in, (LI 1), (LI 0))>;
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
def : Pat<(i64 (anyext i1:$in)),
- (SELECT_I8 crbitrc:$in, (LI8 1), (LI8 0))>;
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
// match setcc on i1 variables.
// CRANDC is:
multiclass FSetCCPat<SDPatternOperator SetCC, ValueType Ty, I FCmp> {
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_un)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_un)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
}
let Predicates = [HasFPU] in {
define <1 x i64> @fcvtzs_1d_intrinsic(<1 x double> %A) nounwind {
;CHECK-LABEL: fcvtzs_1d_intrinsic:
;CHECK-NOT: ld1
-;CHECK: fcvtzs d0, d0
-;CHECK-NEXT: ret
+;CHECK: fcvtzs{{.*}}, d0
+;CHECK: ret
%tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %A)
ret <1 x i64> %tmp3
}
define <1 x i64> @fcvtzu_1d_intrinsic(<1 x double> %A) nounwind {
;CHECK-LABEL: fcvtzu_1d_intrinsic:
;CHECK-NOT: ld1
-;CHECK: fcvtzu d0, d0
-;CHECK-NEXT: ret
+;CHECK: fcvtzu{{.*}}, d0
+;CHECK: ret
%tmp3 = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %A)
ret <1 x i64> %tmp3
}
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v6, v[0:1], off
-; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:2
-; GFX9-NEXT: global_load_ushort v8, v[0:1], off offset:4
-; GFX9-NEXT: global_load_ushort v9, v[2:3], off
-; GFX9-NEXT: global_load_ushort v10, v[2:3], off offset:2
-; GFX9-NEXT: global_load_ushort v11, v[2:3], off offset:4
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: s_waitcnt vmcnt(4)
-; GFX9-NEXT: v_perm_b32 v0, v7, v6, s4
+; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:4
+; GFX9-NEXT: global_load_ushort v8, v[2:3], off
+; GFX9-NEXT: global_load_ushort v9, v[2:3], off offset:4
+; GFX9-NEXT: global_load_ushort v10, v[0:1], off offset:2
+; GFX9-NEXT: global_load_ushort v11, v[2:3], off offset:2
+; GFX9-NEXT: s_waitcnt vmcnt(5)
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v6
+; GFX9-NEXT: s_waitcnt vmcnt(3)
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v8
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: v_pk_add_u16 v2, v7, v9
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_perm_b32 v1, v10, v9, s4
-; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: v_lshl_or_b32 v0, v10, 16, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v2, v8, v11
+; GFX9-NEXT: v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
; GFX9-NEXT: global_store_short v[4:5], v0, off
; GFX9-NEXT: global_store_short_d16_hi v[4:5], v0, off offset:2
; GFX9-NEXT: global_store_short v[4:5], v2, off offset:4
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v6, v[0:1], off
-; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:2
-; GFX9-NEXT: global_load_ushort v8, v[0:1], off offset:4
-; GFX9-NEXT: global_load_ushort v9, v[0:1], off offset:6
-; GFX9-NEXT: global_load_ushort v10, v[0:1], off offset:8
-; GFX9-NEXT: global_load_ushort v11, v[2:3], off
-; GFX9-NEXT: global_load_ushort v12, v[2:3], off offset:2
-; GFX9-NEXT: global_load_ushort v13, v[2:3], off offset:4
-; GFX9-NEXT: global_load_ushort v14, v[2:3], off offset:6
-; GFX9-NEXT: global_load_ushort v15, v[2:3], off offset:8
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:4
+; GFX9-NEXT: global_load_ushort v8, v[0:1], off offset:8
+; GFX9-NEXT: global_load_ushort v9, v[2:3], off
+; GFX9-NEXT: global_load_ushort v10, v[2:3], off offset:4
+; GFX9-NEXT: global_load_ushort v11, v[2:3], off offset:8
+; GFX9-NEXT: global_load_ushort v12, v[0:1], off offset:2
+; GFX9-NEXT: global_load_ushort v13, v[0:1], off offset:6
+; GFX9-NEXT: global_load_ushort v14, v[2:3], off offset:2
+; GFX9-NEXT: global_load_ushort v15, v[2:3], off offset:6
+; GFX9-NEXT: s_waitcnt vmcnt(9)
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v6
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: v_perm_b32 v0, v7, v6, s4
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v7
; GFX9-NEXT: s_waitcnt vmcnt(6)
-; GFX9-NEXT: v_perm_b32 v1, v9, v8, s4
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v9
+; GFX9-NEXT: s_waitcnt vmcnt(5)
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v10
+; GFX9-NEXT: s_waitcnt vmcnt(4)
+; GFX9-NEXT: v_pk_add_u16 v6, v8, v11
; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_perm_b32 v2, v12, v11, s4
-; GFX9-NEXT: v_pk_add_u16 v0, v0, v2
+; GFX9-NEXT: v_lshl_or_b32 v0, v12, 16, v0
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: v_lshl_or_b32 v1, v13, 16, v1
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_perm_b32 v3, v14, v13, s4
+; GFX9-NEXT: v_lshl_or_b32 v2, v14, 16, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v6, v10, v15
+; GFX9-NEXT: v_lshl_or_b32 v3, v15, 16, v3
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v2
; GFX9-NEXT: v_pk_add_u16 v1, v1, v3
; GFX9-NEXT: global_store_short v[4:5], v0, off
; GFX9-NEXT: global_store_short_d16_hi v[4:5], v0, off offset:2
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v6, v[0:1], off
-; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:2
-; GFX9-NEXT: global_load_ushort v8, v[0:1], off offset:4
-; GFX9-NEXT: global_load_ushort v9, v[0:1], off offset:6
-; GFX9-NEXT: global_load_ushort v10, v[0:1], off offset:8
-; GFX9-NEXT: global_load_ushort v11, v[0:1], off offset:10
-; GFX9-NEXT: global_load_ushort v12, v[0:1], off offset:12
-; GFX9-NEXT: global_load_ushort v13, v[2:3], off
-; GFX9-NEXT: global_load_ushort v14, v[2:3], off offset:2
-; GFX9-NEXT: global_load_ushort v15, v[2:3], off offset:4
-; GFX9-NEXT: global_load_ushort v16, v[2:3], off offset:6
-; GFX9-NEXT: global_load_ushort v17, v[2:3], off offset:8
-; GFX9-NEXT: global_load_ushort v18, v[2:3], off offset:10
-; GFX9-NEXT: global_load_ushort v19, v[2:3], off offset:12
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: global_load_ushort v7, v[0:1], off offset:4
+; GFX9-NEXT: global_load_ushort v8, v[0:1], off offset:8
+; GFX9-NEXT: global_load_ushort v9, v[0:1], off offset:12
+; GFX9-NEXT: global_load_ushort v10, v[2:3], off
+; GFX9-NEXT: global_load_ushort v11, v[2:3], off offset:4
+; GFX9-NEXT: global_load_ushort v12, v[2:3], off offset:8
+; GFX9-NEXT: global_load_ushort v13, v[2:3], off offset:12
+; GFX9-NEXT: global_load_ushort v14, v[0:1], off offset:2
+; GFX9-NEXT: global_load_ushort v15, v[0:1], off offset:6
+; GFX9-NEXT: global_load_ushort v16, v[0:1], off offset:10
+; GFX9-NEXT: global_load_ushort v17, v[2:3], off offset:2
+; GFX9-NEXT: global_load_ushort v18, v[2:3], off offset:6
+; GFX9-NEXT: global_load_ushort v19, v[2:3], off offset:10
+; GFX9-NEXT: s_waitcnt vmcnt(13)
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v6
; GFX9-NEXT: s_waitcnt vmcnt(12)
-; GFX9-NEXT: v_perm_b32 v0, v7, v6, s4
-; GFX9-NEXT: s_waitcnt vmcnt(10)
-; GFX9-NEXT: v_perm_b32 v1, v9, v8, s4
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v7
+; GFX9-NEXT: s_waitcnt vmcnt(11)
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v8
+; GFX9-NEXT: s_waitcnt vmcnt(9)
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v10
; GFX9-NEXT: s_waitcnt vmcnt(8)
-; GFX9-NEXT: v_perm_b32 v2, v11, v10, s4
+; GFX9-NEXT: v_and_b32_e32 v6, 0xffff, v11
+; GFX9-NEXT: s_waitcnt vmcnt(7)
+; GFX9-NEXT: v_and_b32_e32 v7, 0xffff, v12
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_pk_add_u16 v8, v9, v13
; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_perm_b32 v3, v14, v13, s4
-; GFX9-NEXT: v_pk_add_u16 v0, v0, v3
+; GFX9-NEXT: v_lshl_or_b32 v0, v14, 16, v0
+; GFX9-NEXT: s_waitcnt vmcnt(4)
+; GFX9-NEXT: v_lshl_or_b32 v1, v15, 16, v1
; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_perm_b32 v6, v16, v15, s4
-; GFX9-NEXT: v_pk_add_u16 v1, v1, v6
+; GFX9-NEXT: v_lshl_or_b32 v2, v16, 16, v2
+; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: v_lshl_or_b32 v3, v17, 16, v3
; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_perm_b32 v7, v18, v17, s4
+; GFX9-NEXT: v_lshl_or_b32 v6, v18, 16, v6
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v8, v12, v19
+; GFX9-NEXT: v_lshl_or_b32 v7, v19, 16, v7
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v3
+; GFX9-NEXT: v_pk_add_u16 v1, v1, v6
; GFX9-NEXT: v_pk_add_u16 v2, v2, v7
; GFX9-NEXT: global_store_short v[4:5], v0, off
; GFX9-NEXT: global_store_short_d16_hi v[4:5], v0, off offset:2
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
; GFX9-NEXT: global_load_ushort v14, v[0:1], off offset:16
-; GFX9-NEXT: global_load_ushort v15, v[0:1], off offset:18
-; GFX9-NEXT: global_load_ushort v16, v[2:3], off offset:16
-; GFX9-NEXT: global_load_ushort v17, v[2:3], off offset:18
+; GFX9-NEXT: global_load_ushort v15, v[2:3], off offset:16
; GFX9-NEXT: global_load_dwordx4 v[10:13], v[2:3], off
-; GFX9-NEXT: global_load_ushort v18, v[0:1], off offset:20
-; GFX9-NEXT: global_load_ushort v19, v[2:3], off offset:20
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: global_load_ushort v16, v[0:1], off offset:20
+; GFX9-NEXT: global_load_ushort v17, v[2:3], off offset:20
+; GFX9-NEXT: global_load_ushort v18, v[0:1], off offset:18
+; GFX9-NEXT: global_load_ushort v19, v[2:3], off offset:18
+; GFX9-NEXT: s_waitcnt vmcnt(6)
+; GFX9-NEXT: v_and_b32_e32 v14, 0xffff, v14
; GFX9-NEXT: s_waitcnt vmcnt(5)
-; GFX9-NEXT: v_perm_b32 v14, v15, v14, s4
-; GFX9-NEXT: s_waitcnt vmcnt(3)
-; GFX9-NEXT: v_perm_b32 v15, v17, v16, s4
-; GFX9-NEXT: s_waitcnt vmcnt(2)
+; GFX9-NEXT: v_and_b32_e32 v15, 0xffff, v15
+; GFX9-NEXT: s_waitcnt vmcnt(4)
; GFX9-NEXT: v_pk_add_u16 v0, v6, v10
; GFX9-NEXT: v_pk_add_u16 v1, v7, v11
; GFX9-NEXT: v_pk_add_u16 v2, v8, v12
; GFX9-NEXT: v_pk_add_u16 v3, v9, v13
+; GFX9-NEXT: s_waitcnt vmcnt(1)
+; GFX9-NEXT: v_lshl_or_b32 v7, v18, 16, v14
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_add_u16 v6, v18, v19
-; GFX9-NEXT: v_pk_add_u16 v7, v14, v15
+; GFX9-NEXT: v_lshl_or_b32 v8, v19, 16, v15
; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
-; GFX9-NEXT: global_store_short v[4:5], v7, off offset:16
-; GFX9-NEXT: global_store_short_d16_hi v[4:5], v7, off offset:18
+; GFX9-NEXT: v_pk_add_u16 v6, v16, v17
+; GFX9-NEXT: v_pk_add_u16 v0, v7, v8
+; GFX9-NEXT: global_store_short v[4:5], v0, off offset:16
+; GFX9-NEXT: global_store_short_d16_hi v[4:5], v0, off offset:18
; GFX9-NEXT: global_store_short v[4:5], v6, off offset:20
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: v_andn2_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
+; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_and_b32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%not.src1 = xor i32 %src1, -1
define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) {
; GCN-LABEL: v_andn2_i32_sv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
+; GCN-NEXT: v_not_b32_e32 v0, v0
; GCN-NEXT: v_and_b32_e32 v0, s2, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i32_sv:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i32 %src1, -1
; GCN-LABEL: v_andn2_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v2, -1, v2
-; GCN-NEXT: v_xor_b32_e32 v3, -1, v3
+; GCN-NEXT: v_not_b32_e32 v2, v2
+; GCN-NEXT: v_not_b32_e32 v3, v3
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: v_and_b32_e32 v1, v1, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
-; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
+; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
define amdgpu_ps <2 x float> @v_andn2_i64_sv(i64 inreg %src0, i64 %src1) {
; GCN-LABEL: v_andn2_i64_sv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_and_b32_e32 v0, s2, v0
; GCN-NEXT: v_and_b32_e32 v1, s3, v1
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i64_sv:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
+; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v1, s3, v1
; GFX10PLUS-NEXT: ; return to shader part epilog
; GCN: ; %bb.0:
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: v_and_b32_e32 v0, s2, v0
-; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i16_sv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i16 %src1, -1
%and = and i16 %src0, %not.src1
; GCN: ; %bb.0:
; GCN-NEXT: s_xor_b32 s0, s2, -1
; GCN-NEXT: v_and_b32_e32 v0, s0, v0
-; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_andn2_i16_vs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_xor_b32 s0, s2, -1
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i16 %src1, -1
%and = and i16 %src0, %not.src1
; GFX7-NEXT: s_lshl_b32 s2, s1, 8
; GFX7-NEXT: s_bfe_u32 s1, s1, 0x80008
; GFX7-NEXT: s_or_b32 s1, s1, s2
-; GFX7-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX7-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX7-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX7-NEXT: s_and_b32 s0, 0xffff, s0
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 8, v0
; GFX7-NEXT: v_bfe_u32 v0, v0, 8, 8
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX7-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_bswap_i16_zext_to_i32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
-; SI-NEXT: v_bfe_u32 v0, v0, 0, 16
+; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_test_uitofp_i16_byte_to_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
-; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
+; VI-NEXT: v_mov_b32_e32 v1, 0xffff
+; VI-NEXT: v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%masked = and i16 %arg0, 255
%itofp = uitofp i16 %masked to float
; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5
; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16:
; GFX8-NEXT: v_rcp_f16_e32 v2, v1
; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16_afn:
; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5
; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16_ulp25:
; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX8-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0
; GFX8-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_rcp_v2f16:
; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX8-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0
; GFX8-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_rcp_v2f16_arcp:
; GFX8-LABEL: v_rcp_v2f16_arcp_afn:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_rcp_f16_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_rcp_f16_e32 v1, v0
+; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_rcp_v2f16_arcp_afn:
; GFX8-LABEL: v_rcp_v2f16_ulp25:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_rcp_f16_e32 v0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_rcp_f16_e32 v1, v0
+; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_rcp_v2f16_ulp25:
; GFX8-NEXT: v_rcp_f16_e32 v2, v1
; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16_afn_ulp25:
; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5
; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3
; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16_arcp_ulp25:
; GFX8-NEXT: v_rcp_f16_e32 v2, v1
; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v2f16:
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v2f16_fneg_lhs:
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v2f16_fneg_rhs:
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v2f16_fneg_lhs_rhs:
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v3f16:
; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10
-; GFX8-NEXT: v_mov_b32_e32 v4, 16
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fma_v4f16:
; GFX8-NEXT: v_mov_b32_e32 v2, 0x4000
; GFX8-NEXT: v_max_f16_e32 v1, 2.0, v0
; GFX8-NEXT: v_max_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_min_f16_e32 v0, 4.0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mov_b32_e32 v2, 0x4400
; GFX8-NEXT: v_min_f16_e32 v1, 4.0, v1
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
%maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
%fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>)
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v2f16:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v2f16_fneg_lhs:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v2f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v3f16:
; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000
; GFX8-NEXT: v_xor_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v3f16_fneg_lhs:
; GFX8-NEXT: v_mov_b32_e32 v4, 0x80008000
; GFX8-NEXT: v_xor_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v3f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v4f16:
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v4f16_fneg_lhs:
; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v4f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v6f16:
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v6f16_fneg_lhs:
; GFX8-NEXT: v_xor_b32_e32 v4, 0x80008000, v4
; GFX8-NEXT: v_xor_b32_e32 v5, 0x80008000, v5
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v6f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
-; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v8f16:
; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
-; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v8f16_fneg_lhs:
; GFX8-NEXT: v_xor_b32_e32 v6, 0x80008000, v6
; GFX8-NEXT: v_xor_b32_e32 v7, 0x80008000, v7
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
-; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v8f16_fneg_rhs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4
-; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5
-; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6
-; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7
-; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs:
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_exp_f16_e32 v1, v1
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f16:
; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_exp_f16_e32 v1, v1
; GFX9-NEXT: v_exp_f16_e32 v0, v0
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f16:
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: v_exp_f16_e32 v1, v1
+; GFX10-NEXT: v_exp_f16_e32 v1, v2
; GFX10-NEXT: v_exp_f16_e32 v0, v0
-; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: v_log_f16_e32 v2, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_log_f16_e32 v2, v2
+; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT: v_exp_f16_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: v_exp_f16_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_exp_f16_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
ret <2 x half> %pow
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_exp_f16_e32 v0, v0
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX8-NEXT: v_exp_f16_e32 v1, v1
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_exp_f16_e32 v1, v1
; GFX9-NEXT: v_exp_f16_e32 v0, v0
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: v_exp_f16_e32 v1, v1
+; GFX10-NEXT: v_exp_f16_e32 v1, v2
; GFX10-NEXT: v_exp_f16_e32 v0, v0
-; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: v_log_f16_e32 v2, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_log_f16_e32 v2, v2
+; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_exp_f16_e32 v0, v0
; GFX11-NEXT: v_exp_f16_e32 v1, v1
+; GFX11-NEXT: v_exp_f16_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
%pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_exp_f16_e32 v0, v0
-; GFX8-NEXT: v_exp_f16_e32 v1, v1
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_exp_f16_e32 v1, v2
+; GFX8-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_exp_f16_e32 v1, v2
; GFX9-NEXT: v_exp_f16_e32 v0, v0
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_log_f16_e32 v2, v0
-; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX10-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v1
; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: v_exp_f16_e32 v1, v1
+; GFX10-NEXT: v_exp_f16_e32 v1, v2
; GFX10-NEXT: v_exp_f16_e32 v0, v0
-; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: v_log_f16_e32 v2, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_log_f16_e32 v2, v2
+; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT: v_exp_f16_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: v_exp_f16_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_exp_f16_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%y.fneg = fneg <2 x half> %y
%pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
; GFX8-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v1
+; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
+; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_exp_f16_e32 v0, v0
-; GFX8-NEXT: v_exp_f16_e32 v1, v1
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_exp_f16_e32 v1, v2
+; GFX8-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2
; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-NEXT: v_exp_f16_e32 v1, v2
; GFX9-NEXT: v_exp_f16_e32 v0, v0
-; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4
+; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v2, v3
; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
-; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: v_exp_f16_e32 v1, v1
+; GFX10-NEXT: v_exp_f16_e32 v1, v2
; GFX10-NEXT: v_exp_f16_e32 v0, v0
-; GFX10-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX11-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-NEXT: v_log_f16_e32 v0, v0
+; GFX11-NEXT: v_log_f16_e32 v2, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1
; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_log_f16_e32 v2, v2
+; GFX11-NEXT: v_log_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v0, v1 :: v_dual_mul_dx9_zero_f32 v1, v2, v3
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v1, v2, v1 :: v_dual_mul_dx9_zero_f32 v0, v0, v3
; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GFX11-NEXT: v_exp_f16_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-NEXT: v_exp_f16_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_exp_f16_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%x.fneg = fneg <2 x half> %x
%y.fneg = fneg <2 x half> %y
; CI-NEXT: v_fma_f32 v3, -v3, v6, v4
; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
; CI-NEXT: v_div_fmas_f32 v3, v3, v5, v6
-; CI-NEXT: v_bfe_u32 v0, v0, 0, 16
; CI-NEXT: v_div_fixup_f32 v3, v3, v2, v1
; CI-NEXT: v_trunc_f32_e32 v3, v3
; CI-NEXT: v_fma_f32 v1, -v3, v2, v1
; CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CI-NEXT: v_bfe_u32 v1, v1, 0, 16
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v2, v0, v1
; CI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_div_fixup_f16 v1, v1, v2, s1
; VI-NEXT: v_trunc_f16_e32 v1, v1
; VI-NEXT: v_fma_f16 v1, -v1, v2, s1
-; VI-NEXT: v_mov_b32_e32 v2, 16
-; VI-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI-NEXT: v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_or_b32_e32 v2, v0, v1
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: flat_store_dword v[0:1], v2
; CI-NEXT: v_fma_f32 v5, -v5, v8, v6
; CI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
; CI-NEXT: v_div_fmas_f32 v5, v5, v7, v8
-; CI-NEXT: v_bfe_u32 v1, v1, 0, 16
-; CI-NEXT: v_bfe_u32 v0, v0, 0, 16
; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; CI-NEXT: v_or_b32_e32 v0, v0, v1
-; CI-NEXT: v_bfe_u32 v1, v2, 0, 16
; CI-NEXT: v_div_fixup_f32 v5, v5, v4, v3
; CI-NEXT: v_trunc_f32_e32 v5, v5
; CI-NEXT: v_fma_f32 v3, -v5, v4, v3
; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NEXT: v_bfe_u32 v2, v3, 0, 16
-; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; CI-NEXT: v_or_b32_e32 v1, v1, v2
+; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
+; CI-NEXT: v_or_b32_e32 v1, v2, v1
; CI-NEXT: v_mov_b32_e32 v2, s4
; CI-NEXT: v_mov_b32_e32 v3, s5
; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: v_trunc_f16_e32 v1, v1
; VI-NEXT: v_fma_f16 v1, -v1, v2, s6
; VI-NEXT: v_cvt_f32_f16_e32 v2, s3
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_mul_f32_e32 v2, v2, v4
; VI-NEXT: v_cvt_f16_f32_e32 v2, v2
; VI-NEXT: v_mov_b32_e32 v4, s9
; VI-NEXT: v_div_fixup_f16 v3, v3, v4, s7
; VI-NEXT: v_trunc_f16_e32 v3, v3
; VI-NEXT: v_fma_f16 v3, -v3, v4, s7
-; VI-NEXT: v_mov_b32_e32 v4, 16
-; VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_mov_b32_e32 v3, s5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: s_and_b32 s2, s2, 0x7f
; GFX8-NEXT: s_and_b32 s1, s1, 0x7f
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX8-NEXT: s_lshr_b32 s1, s1, 1
; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX9-NEXT: s_and_b32 s2, s2, 0x7f
; GFX9-NEXT: s_and_b32 s1, s1, 0x7f
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: s_lshr_b32 s1, s1, 1
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
; GFX10-NEXT: s_and_b32 s2, s2, 0x7f
; GFX10-NEXT: s_and_b32 s1, s1, 0x7f
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: s_lshr_b32 s1, s1, 1
; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX11-NEXT: s_and_b32 s2, s2, 0x7f
; GFX11-NEXT: s_and_b32 s1, s1, 0x7f
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: s_lshr_b32 s1, s1, 1
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX8-LABEL: s_fshl_i8:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_and_b32 s3, s2, 7
; GFX8-NEXT: s_andn2_b32 s2, 7, s2
; GFX8-NEXT: s_lshr_b32 s1, s1, 1
; GFX9-LABEL: s_fshl_i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_and_b32 s3, s2, 7
; GFX9-NEXT: s_andn2_b32 s2, 7, s2
; GFX9-NEXT: s_lshr_b32 s1, s1, 1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_and_b32 s3, s2, 7
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_andn2_b32 s2, 7, s2
; GFX10-NEXT: s_lshr_b32 s1, s1, 1
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_and_b32 s3, s2, 7
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
; GFX11-NEXT: s_lshr_b32 s1, s1, 1
; GFX11-NEXT: s_lshl_b32 s0, s0, s3
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 7
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
; GFX8-NEXT: v_mov_b32_e32 v3, 1
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
; GFX9-NEXT: v_mov_b32_e32 v3, 1
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v3, v2
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v3, v2
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX8-LABEL: s_fshl_i8_4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
; GFX8-NEXT: s_lshr_b32 s1, s1, 4
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX9-LABEL: s_fshl_i8_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
; GFX9-NEXT: s_lshr_b32 s1, s1, 4
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s1, s1, 4
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshr_b32 s1, s1, 4
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX8-LABEL: s_fshl_i8_5:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, 5
; GFX8-NEXT: s_lshr_b32 s1, s1, 3
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX9-LABEL: s_fshl_i8_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, 5
; GFX9-NEXT: s_lshr_b32 s1, s1, 3
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 5
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s1, s1, 3
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshl_b32 s0, s0, 5
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshr_b32 s1, s1, 3
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s4, s1, 8
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshr_b32 s5, s2, 8
; GFX8-NEXT: s_and_b32 s6, s2, 7
; GFX8-NEXT: s_andn2_b32 s2, 7, s2
; GFX8-NEXT: s_and_b32 s1, s5, 7
; GFX8-NEXT: s_lshl_b32 s1, s3, s1
; GFX8-NEXT: s_and_b32 s3, s4, 0xff
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_andn2_b32 s2, 7, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, 1
; GFX8-NEXT: s_lshr_b32 s2, s3, s2
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9: ; %bb.0:
; GFX9-NEXT: s_lshr_b32 s4, s1, 8
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshr_b32 s5, s2, 8
; GFX9-NEXT: s_and_b32 s6, s2, 7
; GFX9-NEXT: s_andn2_b32 s2, 7, s2
; GFX9-NEXT: s_and_b32 s1, s5, 7
; GFX9-NEXT: s_lshl_b32 s1, s3, s1
; GFX9-NEXT: s_and_b32 s3, s4, 0xff
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_andn2_b32 s2, 7, s5
; GFX9-NEXT: s_lshr_b32 s3, s3, 1
; GFX9-NEXT: s_lshr_b32 s2, s3, s2
; GFX9-NEXT: s_or_b32 s1, s1, s2
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
; GFX10-NEXT: s_and_b32 s6, s2, 7
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
-; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
; GFX10-NEXT: s_lshr_b32 s3, s0, 8
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshl_b32 s0, s0, s6
; GFX10-NEXT: s_and_b32 s6, s5, 7
; GFX10-NEXT: s_andn2_b32 s5, 7, s5
; GFX10-NEXT: s_or_b32 s2, s3, s4
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: s_and_b32 s1, s2, 0xff
-; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX10-NEXT: s_and_b32 s0, s0, 0xff
-; GFX10-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10-NEXT: s_lshl_b32 s1, s1, 8
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-NEXT: s_and_b32 s4, s4, 0xff
; GFX11-NEXT: s_and_b32 s6, s2, 7
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
; GFX11-NEXT: s_lshr_b32 s3, s0, 8
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_lshl_b32 s0, s0, s6
; GFX11-NEXT: s_and_b32 s6, s5, 7
; GFX11-NEXT: s_and_not1_b32 s5, 7, s5
; GFX11-NEXT: s_or_b32 s2, s3, s4
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: s_and_b32 s1, s2, 0xff
-; GFX11-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
-; GFX11-NEXT: s_lshl_b32 s1, s1, s2
+; GFX11-NEXT: s_lshl_b32 s1, s1, 8
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v5, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v5
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v4, v4
; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0
; GFX8-NEXT: v_mov_b32_e32 v6, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5
+; GFX8-NEXT: v_not_b32_e32 v2, v5
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v1, v1, v3
; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: s_mov_b32 s4, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5
+; GFX9-NEXT: v_not_b32_e32 v2, v5
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshlrev_b16_e32 v1, v1, v3
; GFX9-NEXT: v_lshrrev_b16_sdwa v3, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0
-; GFX10-NEXT: v_xor_b32_e32 v7, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v7, v2
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v6, v3
; GFX10-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v0
-; GFX11-NEXT: v_xor_b32_e32 v7, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v7, v2
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-NEXT: v_xor_b32_e32 v6, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v6, v3
; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_lshr_b32 s8, s1, 24
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshr_b32 s9, s2, 8
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_lshr_b32 s11, s2, 24
; GFX8-NEXT: s_and_b32 s1, s9, 7
; GFX8-NEXT: s_lshl_b32 s1, s3, s1
; GFX8-NEXT: s_and_b32 s3, s6, 0xff
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_andn2_b32 s2, 7, s9
; GFX8-NEXT: s_lshr_b32 s3, s3, 1
; GFX8-NEXT: s_lshr_b32 s2, s3, s2
; GFX8-NEXT: s_and_b32 s2, s10, 7
; GFX8-NEXT: s_lshl_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s4, s7, 0xff
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_andn2_b32 s3, 7, s10
; GFX8-NEXT: s_lshr_b32 s4, s4, 1
; GFX8-NEXT: s_lshr_b32 s3, s4, s3
; GFX9-NEXT: s_lshr_b32 s7, s1, 16
; GFX9-NEXT: s_lshr_b32 s8, s1, 24
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshr_b32 s9, s2, 8
; GFX9-NEXT: s_lshr_b32 s10, s2, 16
; GFX9-NEXT: s_lshr_b32 s11, s2, 24
; GFX9-NEXT: s_and_b32 s1, s9, 7
; GFX9-NEXT: s_lshl_b32 s1, s3, s1
; GFX9-NEXT: s_and_b32 s3, s6, 0xff
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_andn2_b32 s2, 7, s9
; GFX9-NEXT: s_lshr_b32 s3, s3, 1
; GFX9-NEXT: s_lshr_b32 s2, s3, s2
; GFX9-NEXT: s_and_b32 s2, s10, 7
; GFX9-NEXT: s_lshl_b32 s2, s4, s2
; GFX9-NEXT: s_and_b32 s4, s7, 0xff
-; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
; GFX9-NEXT: s_andn2_b32 s3, 7, s10
; GFX9-NEXT: s_lshr_b32 s4, s4, 1
; GFX9-NEXT: s_lshr_b32 s3, s4, s3
; GFX10-NEXT: s_lshr_b32 s8, s1, 24
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshr_b32 s9, s2, 8
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s10, s2, 16
; GFX10-NEXT: s_lshr_b32 s11, s2, 24
; GFX10-NEXT: s_and_b32 s12, s2, 7
; GFX10-NEXT: s_lshr_b32 s1, s1, s2
; GFX10-NEXT: s_and_b32 s2, s6, 0xff
; GFX10-NEXT: s_and_b32 s6, s9, 7
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
; GFX10-NEXT: s_andn2_b32 s9, 7, s9
; GFX10-NEXT: s_lshr_b32 s2, s2, 1
; GFX10-NEXT: s_lshr_b32 s4, s0, 16
; GFX10-NEXT: s_or_b32 s1, s3, s2
; GFX10-NEXT: s_and_b32 s2, s7, 0xff
; GFX10-NEXT: s_and_b32 s3, s10, 7
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
; GFX10-NEXT: s_andn2_b32 s6, 7, s10
; GFX10-NEXT: s_lshr_b32 s2, s2, 1
; GFX10-NEXT: s_lshl_b32 s3, s4, s3
; GFX11-NEXT: s_lshr_b32 s8, s1, 24
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshr_b32 s9, s2, 8
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_lshr_b32 s10, s2, 16
; GFX11-NEXT: s_lshr_b32 s11, s2, 24
; GFX11-NEXT: s_and_b32 s12, s2, 7
; GFX11-NEXT: s_lshr_b32 s1, s1, s2
; GFX11-NEXT: s_and_b32 s2, s6, 0xff
; GFX11-NEXT: s_and_b32 s6, s9, 7
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
; GFX11-NEXT: s_and_not1_b32 s9, 7, s9
; GFX11-NEXT: s_lshr_b32 s2, s2, 1
; GFX11-NEXT: s_lshr_b32 s4, s0, 16
; GFX11-NEXT: s_or_b32 s1, s3, s2
; GFX11-NEXT: s_and_b32 s2, s7, 0xff
; GFX11-NEXT: s_and_b32 s3, s10, 7
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
; GFX11-NEXT: s_and_not1_b32 s6, 7, s10
; GFX11-NEXT: s_lshr_b32 s2, s2, 1
; GFX11-NEXT: s_lshl_b32 s3, s4, s3
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v2
; GFX6-NEXT: v_and_b32_e32 v9, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v9
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v6
-; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX6-NEXT: v_not_b32_e32 v6, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3
; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_and_b32_e32 v3, 7, v7
-; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
+; GFX6-NEXT: v_not_b32_e32 v6, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v3, v4
; GFX6-NEXT: v_bfe_u32 v4, v1, 16, 8
; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
-; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v6, v8
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_and_b32_e32 v4, 7, v8
; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2
; GFX8-NEXT: v_and_b32_e32 v8, 7, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_mov_b32_e32 v10, 1
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshrrev_b16_sdwa v11, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX8-NEXT: v_or_b32_e32 v2, v8, v2
; GFX8-NEXT: v_and_b32_e32 v8, 7, v5
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX8-NEXT: v_not_b32_e32 v5, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4
; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
; GFX8-NEXT: v_and_b32_e32 v4, 7, v6
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6
+; GFX8-NEXT: v_not_b32_e32 v5, v6
; GFX8-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6
; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6
; GFX8-NEXT: v_or_b32_e32 v4, v4, v5
; GFX8-NEXT: v_and_b32_e32 v5, 7, v7
-; GFX8-NEXT: v_xor_b32_e32 v6, -1, v7
+; GFX8-NEXT: v_not_b32_e32 v6, v7
; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX8-NEXT: v_mov_b32_e32 v5, 1
; GFX8-NEXT: v_and_b32_e32 v6, 7, v6
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2
; GFX9-NEXT: v_and_b32_e32 v8, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: s_mov_b32 s5, 1
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX9-NEXT: v_or_b32_e32 v2, v8, v2
; GFX9-NEXT: v_and_b32_e32 v8, 7, v5
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX9-NEXT: v_not_b32_e32 v5, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4
; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
; GFX9-NEXT: v_and_b32_e32 v4, 7, v6
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6
+; GFX9-NEXT: v_not_b32_e32 v5, v6
; GFX9-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
; GFX9-NEXT: v_lshrrev_b16_e32 v6, 1, v6
; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v6
; GFX9-NEXT: v_or_b32_e32 v4, v4, v5
; GFX9-NEXT: v_and_b32_e32 v5, 7, v7
-; GFX9-NEXT: v_xor_b32_e32 v6, -1, v7
+; GFX9-NEXT: v_not_b32_e32 v6, v7
; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX9-NEXT: v_mov_b32_e32 v5, 1
; GFX9-NEXT: v_and_b32_e32 v6, 7, v6
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v9, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX10-NEXT: v_lshlrev_b16 v0, v10, v0
-; GFX10-NEXT: v_xor_b32_e32 v10, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v10, v8
; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
; GFX10-NEXT: v_mov_b32_e32 v13, 0xff
; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX10-NEXT: v_and_b32_e32 v12, 0xff, v1
; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-NEXT: v_lshlrev_b16 v3, v8, v3
-; GFX10-NEXT: v_xor_b32_e32 v8, -1, v11
+; GFX10-NEXT: v_not_b32_e32 v8, v11
; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX10-NEXT: v_xor_b32_e32 v13, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v13, v2
; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
; GFX10-NEXT: v_lshrrev_b16 v6, 1, v6
; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
-; GFX11-NEXT: v_xor_b32_e32 v13, -1, v9
+; GFX11-NEXT: v_not_b32_e32 v13, v9
; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
; GFX11-NEXT: v_and_b32_e32 v9, 7, v9
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 24, v1
; GFX11-NEXT: v_and_b32_e32 v13, 7, v13
; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7
; GFX11-NEXT: v_lshlrev_b16 v3, v9, v3
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v10
+; GFX11-NEXT: v_not_b32_e32 v9, v10
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-NEXT: v_lshrrev_b16 v6, v13, v6
-; GFX11-NEXT: v_xor_b32_e32 v13, -1, v11
+; GFX11-NEXT: v_not_b32_e32 v13, v11
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; GFX11-NEXT: v_and_b32_e32 v12, 7, v2
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11-NEXT: v_and_b32_e32 v10, 7, v10
; GFX11-NEXT: v_and_b32_e32 v9, 7, v9
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
; GFX6-NEXT: s_or_b32 s0, s9, s0
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: s_lshr_b32 s8, s1, 8
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: s_or_b32 s1, s7, s1
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX6-NEXT: s_or_b32 s1, s1, s6
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
; GFX6-NEXT: s_or_b32 s2, s9, s2
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_lshr_b32 s8, s3, 8
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
; GFX6-NEXT: s_or_b32 s2, s2, s6
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX6-NEXT: s_or_b32 s3, s7, s3
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s3, s3, s6
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
; GFX6-NEXT: s_or_b32 s4, s9, s4
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_or_b32 s4, s4, s6
; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX6-NEXT: s_or_b32 s5, s7, s5
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: s_or_b32 s5, s5, s6
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s6, s0, 8
; GFX8-NEXT: s_and_b32 s6, s6, 0xff
-; GFX8-NEXT: s_bfe_u32 s10, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s7, s0, 16
; GFX8-NEXT: s_lshr_b32 s8, s0, 24
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s6, s6, s10
+; GFX8-NEXT: s_lshl_b32 s6, s6, 8
; GFX8-NEXT: s_or_b32 s0, s0, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: s_lshr_b32 s9, s1, 8
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_or_b32 s0, s0, s6
-; GFX8-NEXT: s_lshl_b32 s1, s1, s10
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: s_or_b32 s1, s8, s1
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: s_or_b32 s1, s1, s6
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
; GFX8-NEXT: s_lshr_b32 s8, s2, 24
; GFX8-NEXT: s_and_b32 s2, s2, 0xff
-; GFX8-NEXT: s_lshl_b32 s6, s6, s10
+; GFX8-NEXT: s_lshl_b32 s6, s6, 8
; GFX8-NEXT: s_or_b32 s2, s2, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX8-NEXT: s_lshr_b32 s9, s3, 8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_and_b32 s3, s3, 0xff
; GFX8-NEXT: s_or_b32 s2, s2, s6
-; GFX8-NEXT: s_lshl_b32 s3, s3, s10
+; GFX8-NEXT: s_lshl_b32 s3, s3, 8
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: s_or_b32 s3, s8, s3
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX8-NEXT: s_or_b32 s3, s3, s6
; GFX8-NEXT: s_lshr_b32 s7, s4, 16
; GFX8-NEXT: s_lshr_b32 s8, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
-; GFX8-NEXT: s_lshl_b32 s6, s6, s10
+; GFX8-NEXT: s_lshl_b32 s6, s6, 8
; GFX8-NEXT: s_or_b32 s4, s4, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: s_or_b32 s4, s4, s6
; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
-; GFX8-NEXT: s_lshl_b32 s5, s5, s10
+; GFX8-NEXT: s_lshl_b32 s5, s5, 8
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX8-NEXT: s_or_b32 s5, s8, s5
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
-; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s6, s6, 16
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: s_or_b32 s5, s5, s6
; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX9-NEXT: s_lshr_b32 s7, s0, 8
; GFX9-NEXT: s_and_b32 s7, s7, 0xff
-; GFX9-NEXT: s_bfe_u32 s12, 8, 0x100000
+; GFX9-NEXT: s_lshr_b32 s9, s0, 16
; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT: s_lshr_b32 s9, s0, 16
; GFX9-NEXT: s_lshr_b32 s10, s0, 24
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
-; GFX9-NEXT: s_lshl_b32 s7, s7, s12
+; GFX9-NEXT: s_lshl_b32 s7, s7, 8
; GFX9-NEXT: s_or_b32 s0, s0, s7
; GFX9-NEXT: s_and_b32 s7, s9, 0xff
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: s_lshr_b32 s11, s1, 8
-; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1
; GFX9-NEXT: s_or_b32 s0, s0, s7
-; GFX9-NEXT: s_lshl_b32 s1, s1, s12
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
; GFX9-NEXT: s_and_b32 s7, s11, 0xff
; GFX9-NEXT: s_or_b32 s1, s10, s1
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: s_or_b32 s1, s1, s7
; GFX9-NEXT: s_lshr_b32 s7, s2, 8
; GFX9-NEXT: s_lshr_b32 s9, s2, 16
; GFX9-NEXT: s_lshr_b32 s10, s2, 24
; GFX9-NEXT: s_and_b32 s2, s2, 0xff
-; GFX9-NEXT: s_lshl_b32 s7, s7, s12
+; GFX9-NEXT: s_lshl_b32 s7, s7, 8
; GFX9-NEXT: s_or_b32 s2, s2, s7
; GFX9-NEXT: s_and_b32 s7, s9, 0xff
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
; GFX9-NEXT: s_lshr_b32 s11, s3, 8
-; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: s_and_b32 s3, s3, 0xff
; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
; GFX9-NEXT: s_or_b32 s2, s2, s7
-; GFX9-NEXT: s_lshl_b32 s3, s3, s12
+; GFX9-NEXT: s_lshl_b32 s3, s3, 8
; GFX9-NEXT: s_and_b32 s7, s11, 0xff
; GFX9-NEXT: s_or_b32 s3, s10, s3
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: s_or_b32 s3, s3, s7
; GFX9-NEXT: s_lshr_b32 s7, s4, 8
; GFX9-NEXT: s_lshr_b32 s9, s4, 16
; GFX9-NEXT: s_lshr_b32 s10, s4, 24
; GFX9-NEXT: s_and_b32 s4, s4, 0xff
-; GFX9-NEXT: s_lshl_b32 s7, s7, s12
+; GFX9-NEXT: s_lshl_b32 s7, s7, 8
; GFX9-NEXT: s_or_b32 s4, s4, s7
; GFX9-NEXT: s_and_b32 s7, s9, 0xff
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
; GFX9-NEXT: s_or_b32 s4, s4, s7
; GFX9-NEXT: s_lshr_b32 s11, s5, 8
; GFX9-NEXT: s_and_b32 s5, s5, 0xff
; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1
-; GFX9-NEXT: s_lshl_b32 s5, s5, s12
+; GFX9-NEXT: s_lshl_b32 s5, s5, 8
; GFX9-NEXT: s_and_b32 s7, s11, 0xff
; GFX9-NEXT: s_or_b32 s5, s10, s5
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX9-NEXT: s_and_b32 s5, 0xffff, s5
; GFX9-NEXT: s_lshl_b32 s7, s7, 16
; GFX9-NEXT: s_or_b32 s5, s5, s7
; GFX9-NEXT: v_add_u32_e32 v1, v2, v1
; GFX10-NEXT: s_and_b32 s6, s6, 0xff
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000
; GFX10-NEXT: s_lshr_b32 s8, s0, 24
; GFX10-NEXT: s_and_b32 s0, s0, 0xff
-; GFX10-NEXT: s_lshl_b32 s6, s6, s10
+; GFX10-NEXT: s_lshl_b32 s6, s6, 8
; GFX10-NEXT: s_and_b32 s7, s7, 0xff
; GFX10-NEXT: s_or_b32 s0, s0, s6
-; GFX10-NEXT: s_bfe_u32 s6, s7, 0x100000
+; GFX10-NEXT: s_and_b32 s6, 0xffff, s7
; GFX10-NEXT: s_lshr_b32 s7, s4, 8
+; GFX10-NEXT: s_lshr_b32 s10, s4, 16
; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
; GFX10-NEXT: s_and_b32 s7, s7, 0xff
-; GFX10-NEXT: s_lshr_b32 s11, s4, 16
-; GFX10-NEXT: s_lshr_b32 s12, s4, 24
+; GFX10-NEXT: s_lshr_b32 s11, s4, 24
+; GFX10-NEXT: s_and_b32 s4, s4, 0xff
; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT: s_and_b32 s4, s4, 0xff
-; GFX10-NEXT: s_lshl_b32 s7, s7, s10
-; GFX10-NEXT: s_lshr_b32 s13, s5, 8
+; GFX10-NEXT: s_lshl_b32 s7, s7, 8
+; GFX10-NEXT: s_lshr_b32 s12, s5, 8
+; GFX10-NEXT: s_or_b32 s4, s4, s7
; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
-; GFX10-NEXT: s_or_b32 s4, s4, s7
-; GFX10-NEXT: s_and_b32 s7, s11, 0xff
-; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX10-NEXT: s_and_b32 s7, s10, 0xff
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
; GFX10-NEXT: s_and_b32 s5, s5, 0xff
; GFX10-NEXT: s_lshl_b32 s7, s7, 16
+; GFX10-NEXT: s_lshl_b32 s5, s5, 8
; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
-; GFX10-NEXT: s_lshl_b32 s5, s5, s10
; GFX10-NEXT: s_or_b32 s4, s4, s7
-; GFX10-NEXT: s_and_b32 s7, s13, 0xff
-; GFX10-NEXT: s_or_b32 s5, s12, s5
-; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX10-NEXT: s_and_b32 s7, s12, 0xff
+; GFX10-NEXT: s_or_b32 s5, s11, s5
+; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
+; GFX10-NEXT: s_lshl_b32 s7, s7, 16
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
-; GFX10-NEXT: s_lshl_b32 s7, s7, 16
-; GFX10-NEXT: s_lshr_b32 s9, s1, 8
; GFX10-NEXT: s_or_b32 s5, s5, s7
+; GFX10-NEXT: s_lshr_b32 s9, s1, 8
+; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1
-; GFX10-NEXT: s_and_b32 s1, s1, 0xff
+; GFX10-NEXT: s_lshl_b32 s1, s1, 8
; GFX10-NEXT: s_and_b32 s7, s9, 0xff
-; GFX10-NEXT: s_lshl_b32 s1, s1, s10
-; GFX10-NEXT: s_lshr_b32 s9, s2, 16
; GFX10-NEXT: s_or_b32 s1, s8, s1
; GFX10-NEXT: s_lshr_b32 s8, s2, 8
+; GFX10-NEXT: s_lshr_b32 s9, s2, 16
+; GFX10-NEXT: s_and_b32 s8, s8, 0xff
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX10-NEXT: s_and_b32 s8, s8, 0xff
-; GFX10-NEXT: s_lshr_b32 s11, s2, 24
+; GFX10-NEXT: s_lshr_b32 s10, s2, 24
; GFX10-NEXT: s_and_b32 s2, s2, 0xff
-; GFX10-NEXT: s_lshl_b32 s8, s8, s10
-; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX10-NEXT: s_lshl_b32 s8, s8, 8
+; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
; GFX10-NEXT: s_or_b32 s2, s2, s8
+; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1
; GFX10-NEXT: s_lshr_b32 s4, s3, 8
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
-; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000
-; GFX10-NEXT: s_lshl_b32 s3, s3, s10
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
+; GFX10-NEXT: s_lshl_b32 s3, s3, 8
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
; GFX10-NEXT: s_lshl_b32 s5, s5, 16
-; GFX10-NEXT: s_or_b32 s3, s11, s3
+; GFX10-NEXT: s_or_b32 s3, s10, s3
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
; GFX10-NEXT: s_or_b32 s2, s2, s5
-; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
; GFX10-NEXT: s_lshl_b32 s6, s6, 16
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX10-NEXT: s_lshl_b32 s7, s7, 16
+; GFX10-NEXT: s_or_b32 s0, s0, s6
; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2
; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX10-NEXT: s_lshr_b32 s2, s3, 1
-; GFX10-NEXT: s_or_b32 s0, s0, s6
; GFX10-NEXT: s_or_b32 s1, s1, s7
; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2
; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
; GFX11-NEXT: s_lshr_b32 s6, s0, 8
-; GFX11-NEXT: s_bfe_u32 s9, 8, 0x100000
+; GFX11-NEXT: s_lshr_b32 s7, s0, 16
; GFX11-NEXT: s_and_b32 s6, s6, 0xff
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX11-NEXT: s_lshr_b32 s7, s0, 16
; GFX11-NEXT: s_lshr_b32 s8, s0, 24
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
-; GFX11-NEXT: s_lshl_b32 s6, s6, s9
-; GFX11-NEXT: s_lshr_b32 s11, s4, 24
+; GFX11-NEXT: s_lshl_b32 s6, s6, 8
+; GFX11-NEXT: s_lshr_b32 s10, s4, 24
; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_and_b32 s6, s7, 0xff
-; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1
-; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX11-NEXT: s_lshr_b32 s7, s4, 16
; GFX11-NEXT: s_lshl_b32 s6, s6, 16
+; GFX11-NEXT: s_lshr_b32 s7, s4, 16
+; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_lshr_b32 s6, s4, 8
; GFX11-NEXT: s_and_b32 s4, s4, 0xff
+; GFX11-NEXT: s_and_b32 s6, s6, 0xff
; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
-; GFX11-NEXT: s_and_b32 s6, s6, 0xff
+; GFX11-NEXT: s_lshl_b32 s6, s6, 8
; GFX11-NEXT: s_and_b32 s7, s7, 0xff
-; GFX11-NEXT: s_lshl_b32 s6, s6, s9
-; GFX11-NEXT: s_lshr_b32 s12, s5, 8
; GFX11-NEXT: s_or_b32 s4, s4, s6
-; GFX11-NEXT: s_bfe_u32 s6, s7, 0x100000
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s7
+; GFX11-NEXT: s_lshr_b32 s11, s5, 8
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
; GFX11-NEXT: s_lshl_b32 s6, s6, 16
; GFX11-NEXT: s_and_b32 s5, s5, 0xff
; GFX11-NEXT: s_or_b32 s4, s4, s6
-; GFX11-NEXT: s_lshl_b32 s5, s5, s9
-; GFX11-NEXT: s_and_b32 s6, s12, 0xff
-; GFX11-NEXT: s_or_b32 s5, s11, s5
+; GFX11-NEXT: s_lshl_b32 s5, s5, 8
+; GFX11-NEXT: s_and_b32 s6, s11, 0xff
+; GFX11-NEXT: s_or_b32 s5, s10, s5
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3
-; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX11-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
; GFX11-NEXT: s_lshl_b32 s6, s6, 16
+; GFX11-NEXT: s_lshr_b32 s9, s1, 8
; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX11-NEXT: s_or_b32 s5, s5, s6
-; GFX11-NEXT: s_lshr_b32 s10, s1, 8
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
+; GFX11-NEXT: s_and_b32 s7, s9, 0xff
; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2
-; GFX11-NEXT: s_and_b32 s7, s10, 0xff
-; GFX11-NEXT: s_lshl_b32 s1, s1, s9
-; GFX11-NEXT: s_bfe_u32 s6, s7, 0x100000
+; GFX11-NEXT: s_lshl_b32 s1, s1, 8
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s7
+; GFX11-NEXT: s_lshr_b32 s7, s2, 8
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_mul_hi_u32 v1, s5, v1
-; GFX11-NEXT: s_lshr_b32 s7, s2, 8
; GFX11-NEXT: s_or_b32 s1, s8, s1
; GFX11-NEXT: s_lshr_b32 s8, s2, 16
; GFX11-NEXT: s_and_b32 s7, s7, 0xff
-; GFX11-NEXT: s_lshr_b32 s10, s3, 8
-; GFX11-NEXT: s_lshl_b32 s7, s7, s9
+; GFX11-NEXT: s_lshr_b32 s9, s3, 8
+; GFX11-NEXT: s_lshl_b32 s7, s7, 8
+; GFX11-NEXT: s_and_b32 s3, s3, 0xff
; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0
; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX11-NEXT: s_lshr_b32 s4, s2, 24
; GFX11-NEXT: s_and_b32 s2, s2, 0xff
-; GFX11-NEXT: s_and_b32 s3, s3, 0xff
+; GFX11-NEXT: s_lshl_b32 s3, s3, 8
; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
; GFX11-NEXT: s_or_b32 s2, s2, s7
-; GFX11-NEXT: s_lshl_b32 s3, s3, s9
+; GFX11-NEXT: s_or_b32 s3, s4, s3
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s5, v1
; GFX11-NEXT: s_and_b32 s5, s8, 0xff
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: s_bfe_u32 s5, s5, 0x100000
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX11-NEXT: s_lshl_b32 s5, s5, 16
-; GFX11-NEXT: s_or_b32 s3, s4, s3
+; GFX11-NEXT: s_and_b32 s4, s9, 0xff
; GFX11-NEXT: s_or_b32 s2, s2, s5
; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT: s_and_b32 s4, s10, 0xff
; GFX11-NEXT: s_lshr_b32 s2, s2, 1
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX11-NEXT: s_lshl_b32 s6, s6, 16
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2
-; GFX11-NEXT: s_bfe_u32 s2, s3, 0x100000
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s3
; GFX11-NEXT: s_lshl_b32 s3, s4, 16
; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX11-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v2, v2
; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v1, v0, v1, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX6-NEXT: v_mov_b32_e32 v1, s1
; GFX6-NEXT: v_alignbit_b32 v1, s0, v1, 1
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
-; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_alignbit_b32 v1, s0, v1, 1
; GFX8-NEXT: s_lshr_b32 s0, s0, 1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_alignbit_b32 v1, s0, v1, 1
; GFX9-NEXT: s_lshr_b32 s0, s0, 1
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_fshl_i32_ssv:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_alignbit_b32 v1, s0, s1, 1
-; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: s_lshr_b32 s0, s0, 1
; GFX10-NEXT: v_alignbit_b32 v0, s0, v1, v0
; GFX10-NEXT: ; return to shader part epilog
; GFX11-LABEL: v_fshl_i32_ssv:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_alignbit_b32 v1, s0, s1, 1
-; GFX11-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX11-NEXT: v_not_b32_e32 v0, v0
; GFX11-NEXT: s_lshr_b32 s0, s0, 1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_alignbit_b32 v0, s0, v1, v0
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v4, v4
; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX6-NEXT: v_alignbit_b32 v2, v1, v3, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX6-NEXT: v_not_b32_e32 v3, v5
; GFX6-NEXT: v_alignbit_b32 v1, v1, v2, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX8-NEXT: v_not_b32_e32 v4, v4
; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX8-NEXT: v_alignbit_b32 v2, v1, v3, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX8-NEXT: v_not_b32_e32 v3, v5
; GFX8-NEXT: v_alignbit_b32 v1, v1, v2, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX9-NEXT: v_not_b32_e32 v4, v4
; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX9-NEXT: v_alignbit_b32 v2, v1, v3, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX9-NEXT: v_not_b32_e32 v3, v5
; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX10-NEXT: v_not_b32_e32 v4, v4
; GFX10-NEXT: v_alignbit_b32 v3, v1, v3, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX10-NEXT: v_not_b32_e32 v5, v5
; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v2, v0, v2, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX11-NEXT: v_not_b32_e32 v4, v4
; GFX11-NEXT: v_alignbit_b32 v3, v1, v3, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX11-NEXT: v_not_b32_e32 v5, v5
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX6-NEXT: v_not_b32_e32 v6, v6
; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX6-NEXT: v_alignbit_b32 v3, v1, v4, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7
+; GFX6-NEXT: v_not_b32_e32 v4, v7
; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v4
; GFX6-NEXT: v_alignbit_b32 v3, v2, v5, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v4, v8
; GFX6-NEXT: v_alignbit_b32 v2, v2, v3, v4
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX8-NEXT: v_not_b32_e32 v6, v6
; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX8-NEXT: v_alignbit_b32 v3, v1, v4, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX8-NEXT: v_xor_b32_e32 v4, -1, v7
+; GFX8-NEXT: v_not_b32_e32 v4, v7
; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v4
; GFX8-NEXT: v_alignbit_b32 v3, v2, v5, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX8-NEXT: v_xor_b32_e32 v4, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v4, v8
; GFX8-NEXT: v_alignbit_b32 v2, v2, v3, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX9-NEXT: v_not_b32_e32 v6, v6
; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX9-NEXT: v_alignbit_b32 v3, v1, v4, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v7
+; GFX9-NEXT: v_not_b32_e32 v4, v7
; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v4
; GFX9-NEXT: v_alignbit_b32 v3, v2, v5, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v4, v8
; GFX9-NEXT: v_alignbit_b32 v2, v2, v3, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX10-NEXT: v_not_b32_e32 v6, v6
; GFX10-NEXT: v_alignbit_b32 v4, v1, v4, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX10-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX10-NEXT: v_not_b32_e32 v7, v7
; GFX10-NEXT: v_alignbit_b32 v5, v2, v5, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v8, v8
; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7
; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v3, v0, v3, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v6, -1, v6
+; GFX11-NEXT: v_not_b32_e32 v6, v6
; GFX11-NEXT: v_alignbit_b32 v4, v1, v4, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX11-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX11-NEXT: v_not_b32_e32 v7, v7
; GFX11-NEXT: v_alignbit_b32 v5, v2, v5, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v8, v8
; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v8, v8
; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX6-NEXT: v_alignbit_b32 v4, v1, v5, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v9
+; GFX6-NEXT: v_not_b32_e32 v5, v9
; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v5
; GFX6-NEXT: v_alignbit_b32 v4, v2, v6, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
+; GFX6-NEXT: v_not_b32_e32 v5, v10
; GFX6-NEXT: v_alignbit_b32 v2, v2, v4, v5
; GFX6-NEXT: v_alignbit_b32 v4, v3, v7, 1
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
+; GFX6-NEXT: v_not_b32_e32 v5, v11
; GFX6-NEXT: v_alignbit_b32 v3, v3, v4, v5
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v8, v8
; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX8-NEXT: v_alignbit_b32 v4, v1, v5, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v9
+; GFX8-NEXT: v_not_b32_e32 v5, v9
; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v5
; GFX8-NEXT: v_alignbit_b32 v4, v2, v6, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v10
+; GFX8-NEXT: v_not_b32_e32 v5, v10
; GFX8-NEXT: v_alignbit_b32 v2, v2, v4, v5
; GFX8-NEXT: v_alignbit_b32 v4, v3, v7, 1
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v11
+; GFX8-NEXT: v_not_b32_e32 v5, v11
; GFX8-NEXT: v_alignbit_b32 v3, v3, v4, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v8, v8
; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX9-NEXT: v_alignbit_b32 v4, v1, v5, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v9
+; GFX9-NEXT: v_not_b32_e32 v5, v9
; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v5
; GFX9-NEXT: v_alignbit_b32 v4, v2, v6, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v10
+; GFX9-NEXT: v_not_b32_e32 v5, v10
; GFX9-NEXT: v_alignbit_b32 v2, v2, v4, v5
; GFX9-NEXT: v_alignbit_b32 v4, v3, v7, 1
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v11
+; GFX9-NEXT: v_not_b32_e32 v5, v11
; GFX9-NEXT: v_alignbit_b32 v3, v3, v4, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v8, v8
; GFX10-NEXT: v_alignbit_b32 v5, v1, v5, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v9
+; GFX10-NEXT: v_not_b32_e32 v9, v9
; GFX10-NEXT: v_alignbit_b32 v6, v2, v6, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX10-NEXT: v_xor_b32_e32 v10, -1, v10
+; GFX10-NEXT: v_not_b32_e32 v10, v10
; GFX10-NEXT: v_alignbit_b32 v7, v3, v7, 1
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX10-NEXT: v_xor_b32_e32 v11, -1, v11
+; GFX10-NEXT: v_not_b32_e32 v11, v11
; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9
; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_alignbit_b32 v4, v0, v4, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v8, v8
; GFX11-NEXT: v_alignbit_b32 v5, v1, v5, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v9
+; GFX11-NEXT: v_not_b32_e32 v9, v9
; GFX11-NEXT: v_alignbit_b32 v6, v2, v6, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2
-; GFX11-NEXT: v_xor_b32_e32 v10, -1, v10
+; GFX11-NEXT: v_not_b32_e32 v10, v10
; GFX11-NEXT: v_alignbit_b32 v7, v3, v7, 1
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v3
-; GFX11-NEXT: v_xor_b32_e32 v11, -1, v11
+; GFX11-NEXT: v_not_b32_e32 v11, v11
; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s3, s2, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s2
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_bfe_u32 s1, s1, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s0, s0, s3
; GFX6-NEXT: s_lshr_b32 s1, s1, s2
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX8-LABEL: s_fshl_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s3, s2, 15
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX8-NEXT: s_andn2_b32 s2, 15, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX8-NEXT: s_lshr_b32 s1, s1, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s3
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
; GFX8-NEXT: s_lshr_b32 s1, s1, s2
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX9-LABEL: s_fshl_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s3, s2, 15
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX9-NEXT: s_andn2_b32 s2, 15, s2
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX9-NEXT: s_lshr_b32 s1, s1, 1
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
; GFX9-NEXT: s_lshl_b32 s0, s0, s3
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX9-NEXT: s_lshr_b32 s1, s1, s3
-; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
; GFX9-NEXT: s_lshr_b32 s1, s1, s2
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s3, s2, 15
; GFX10-NEXT: s_andn2_b32 s2, 15, s2
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX10-NEXT: s_lshr_b32 s1, s1, s4
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX10-NEXT: s_lshr_b32 s1, s1, 1
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
; GFX10-NEXT: s_lshl_b32 s0, s0, s3
; GFX10-NEXT: s_lshr_b32 s1, s1, s2
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s3, s2, 15
; GFX11-NEXT: s_and_not1_b32 s2, 15, s2
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX11-NEXT: s_lshr_b32 s1, s1, s4
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX11-NEXT: s_lshr_b32 s1, s1, 1
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
; GFX11-NEXT: s_lshl_b32 s0, s0, s3
; GFX11-NEXT: s_lshr_b32 s1, s1, s2
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
;
; GFX8-LABEL: s_fshl_i16_4:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 4
+; GFX8-NEXT: s_lshr_b32 s1, s1, 12
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshl_i16_4:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX9-NEXT: s_lshr_b32 s1, s1, s2
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 4
+; GFX9-NEXT: s_lshr_b32 s1, s1, 12
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshl_i16_4:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, 12, 0x100000
-; GFX10-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10-NEXT: s_lshr_b32 s1, s1, s3
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_lshl_b32 s0, s0, 4
+; GFX10-NEXT: s_lshr_b32 s1, s1, 12
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshl_i16_4:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, 12, 0x100000
-; GFX11-NEXT: s_lshl_b32 s0, s0, s2
-; GFX11-NEXT: s_lshr_b32 s1, s1, s3
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_lshl_b32 s0, s0, 4
+; GFX11-NEXT: s_lshr_b32 s1, s1, 12
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_i16_5:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 5
+; GFX8-NEXT: s_lshr_b32 s1, s1, 11
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshl_i16_5:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX9-NEXT: s_lshr_b32 s1, s1, s2
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 5
+; GFX9-NEXT: s_lshr_b32 s1, s1, 11
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshl_i16_5:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, 11, 0x100000
-; GFX10-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10-NEXT: s_lshr_b32 s1, s1, s3
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_lshl_b32 s0, s0, 5
+; GFX10-NEXT: s_lshr_b32 s1, s1, 11
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshl_i16_5:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, 11, 0x100000
-; GFX11-NEXT: s_lshl_b32 s0, s0, s2
-; GFX11-NEXT: s_lshr_b32 s1, s1, s3
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_lshl_b32 s0, s0, 5
+; GFX11-NEXT: s_lshr_b32 s1, s1, 11
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
-; GFX6-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
; GFX6-NEXT: ; return to shader part epilog
; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
-; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, 1, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX8-NEXT: s_lshr_b32 s0, s0, s1
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0
-; GFX9-NEXT: s_bfe_u32 s0, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s1, 1, 0x100000
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s1
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT: s_lshr_b32 s0, s0, s1
+; GFX9-NEXT: s_lshr_b32 s0, s0, 1
; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s0
; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
; GFX9-NEXT: ; return to shader part epilog
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX10-NEXT: s_lshr_b32 s1, s1, s2
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_lshr_b32 s1, s1, 1
; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1
; GFX11: ; %bb.0:
; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: s_lshr_b32 s1, s1, s2
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: s_lshr_b32 s1, s1, 1
; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b16 v0, v0, s0
; GFX11-NEXT: v_lshrrev_b16 v1, v1, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s2, s1, 15
; GFX6-NEXT: s_andn2_b32 s1, 15, s1
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_lshl_b32 s0, s0, s2
; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s2, s1, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s2, s1, 15
; GFX9-NEXT: s_andn2_b32 s1, 15, s1
-; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 1, v0
; GFX9-NEXT: s_lshl_b32 s0, s0, s2
; GFX9-NEXT: v_lshrrev_b16_e32 v0, s1, v0
; GFX10-NEXT: v_lshrrev_b16 v0, 1, v0
; GFX10-NEXT: s_andn2_b32 s2, 15, s1
; GFX10-NEXT: s_and_b32 s1, s1, 15
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
; GFX10-NEXT: s_lshl_b32 s0, s0, s1
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
; GFX11-NEXT: s_and_not1_b32 s2, 15, s1
; GFX11-NEXT: s_and_b32 s1, s1, 15
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0
; GFX11-NEXT: s_lshl_b32 s0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s2, s1, 15
; GFX6-NEXT: s_andn2_b32 s1, 15, s1
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0
; GFX6-NEXT: s_lshr_b32 s0, s0, s1
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s2, s1, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: v_lshlrev_b16_e32 v0, s2, v0
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8-NEXT: ; return to shader part epilog
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s2, s1, 15
; GFX9-NEXT: s_andn2_b32 s1, 15, s1
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX9-NEXT: s_lshr_b32 s0, s0, 1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: v_lshlrev_b16_e32 v0, s2, v0
-; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX9-NEXT: s_lshr_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX9-NEXT: s_lshr_b32 s0, s0, s1
; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
; GFX9-NEXT: ; return to shader part epilog
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s2, s1, 15
; GFX10-NEXT: s_andn2_b32 s1, 15, s1
-; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000
+; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
-; GFX10-NEXT: s_lshr_b32 s0, s0, s3
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_lshr_b32 s0, s0, 1
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s0, s0, s1
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
; GFX10-NEXT: ; return to shader part epilog
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s2, s1, 15
; GFX11-NEXT: s_and_not1_b32 s1, 15, s1
-; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, 1, 0x100000
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0
-; GFX11-NEXT: s_lshr_b32 s0, s0, s3
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_lshr_b32 s0, s0, 1
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_lshr_b32 s0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s4, 15
; GFX6-NEXT: s_andn2_b32 s4, 15, s4
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s0, s0, s6
; GFX6-NEXT: s_lshr_b32 s2, s2, s4
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_and_b32 s2, s5, 15
; GFX6-NEXT: s_andn2_b32 s4, 15, s5
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, s2
; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
; GFX6-NEXT: s_lshr_b32 s2, s2, s3
; GFX6-NEXT: s_or_b32 s1, s1, s2
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_and_b32 s6, s2, 15
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
; GFX8-NEXT: s_lshr_b32 s5, s2, 16
+; GFX8-NEXT: s_and_b32 s6, s2, 15
; GFX8-NEXT: s_andn2_b32 s2, 15, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX8-NEXT: s_lshr_b32 s1, s1, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshl_b32 s0, s0, s6
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s6, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s6
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
; GFX8-NEXT: s_lshr_b32 s1, s1, s2
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s5, 15
; GFX8-NEXT: s_andn2_b32 s2, 15, s5
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s1, s3, s1
-; GFX8-NEXT: s_lshr_b32 s3, s4, s6
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_lshr_b32 s3, s4, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshr_b32 s2, s3, s2
; GFX8-NEXT: s_or_b32 s1, s1, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
-; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-LABEL: v_fshl_v2i16_4_8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_bfe_u32 s4, 4, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
-; GFX6-NEXT: s_bfe_u32 s4, 11, 0x100000
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
-; GFX6-NEXT: s_bfe_u32 s4, 8, 0x100000
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 11, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
-; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
-; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0
-; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, 1, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX8-NEXT: s_lshr_b32 s0, s0, s1
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_and_b32_e32 v2, 15, v1
; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
; GFX8-NEXT: v_and_b32_e32 v1, 15, v1
-; GFX8-NEXT: s_lshr_b32 s0, s3, s1
+; GFX8-NEXT: s_lshr_b32 s0, s3, 1
; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s4, s2, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s2
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s0, s0, s4
; GFX6-NEXT: v_lshrrev_b32_e32 v0, s2, v0
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX6-NEXT: s_and_b32 s0, s3, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s3
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s0, s1, s0
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1
; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_and_b32 s4, s1, 15
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v0
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshl_b32 s0, s0, s4
; GFX8-NEXT: s_and_b32 s0, s3, 15
; GFX8-NEXT: v_mov_b32_e32 v2, 1
; GFX8-NEXT: s_andn2_b32 s1, 15, s3
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: s_lshl_b32 s0, s2, s0
; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s4, s2, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s2
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
; GFX6-NEXT: s_lshr_b32 s0, s0, s2
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX6-NEXT: s_and_b32 s0, s3, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s3
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: s_lshr_b32 s0, s0, s1
; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshl_v2i16_vss:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_and_b32 s4, s1, 15
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
+; GFX8-NEXT: s_and_b32 s4, s1, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v0
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s0, s0, s4
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
; GFX8-NEXT: s_and_b32 s0, s3, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s0
-; GFX8-NEXT: s_lshr_b32 s0, s2, s4
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_lshr_b32 s0, s2, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s9, s6, 15
; GFX6-NEXT: s_andn2_b32 s6, 15, s6
-; GFX6-NEXT: s_bfe_u32 s9, s9, 0x100000
+; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_lshl_b32 s0, s0, s9
; GFX6-NEXT: s_lshr_b32 s3, s3, s6
; GFX6-NEXT: s_or_b32 s0, s0, s3
; GFX6-NEXT: s_and_b32 s3, s7, 15
; GFX6-NEXT: s_andn2_b32 s6, 15, s7
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s1, s1, s3
; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s6
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
; GFX6-NEXT: s_or_b32 s1, s1, s3
; GFX6-NEXT: s_and_b32 s3, s8, 15
; GFX6-NEXT: s_andn2_b32 s4, 15, s8
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s2, s2, s3
; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_v3i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_and_b32 s9, s4, 15
-; GFX8-NEXT: s_bfe_u32 s9, s9, 0x100000
-; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
; GFX8-NEXT: s_lshr_b32 s8, s4, 16
+; GFX8-NEXT: s_and_b32 s9, s4, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s4
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
+; GFX8-NEXT: s_lshr_b32 s2, s2, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshl_b32 s0, s0, s9
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s9, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s2, s2, s9
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s2, s4
; GFX8-NEXT: s_or_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s2, s8, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s2, s6, s2
-; GFX8-NEXT: s_lshr_b32 s6, s7, s9
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_lshr_b32 s6, s7, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshr_b32 s4, s6, s4
; GFX8-NEXT: s_or_b32 s2, s2, s4
; GFX8-NEXT: s_and_b32 s4, s5, 15
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
-; GFX8-NEXT: s_lshr_b32 s3, s3, s9
-; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s1, s1, s3
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshl_v3i16:
; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v9, v9, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
-; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
; GFX6-NEXT: v_and_b32_e32 v3, 15, v7
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2
; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v3
; GFX8-NEXT: v_lshrrev_b16_e32 v2, v5, v2
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fshl_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s12, s8, 15
; GFX6-NEXT: s_andn2_b32 s8, 15, s8
-; GFX6-NEXT: s_bfe_u32 s12, s12, 0x100000
+; GFX6-NEXT: s_and_b32 s12, 0xffff, s12
; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_lshl_b32 s0, s0, s12
; GFX6-NEXT: s_lshr_b32 s4, s4, s8
; GFX6-NEXT: s_or_b32 s0, s0, s4
; GFX6-NEXT: s_and_b32 s4, s9, 15
; GFX6-NEXT: s_andn2_b32 s8, 15, s9
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, s4
; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
; GFX6-NEXT: s_or_b32 s1, s1, s4
; GFX6-NEXT: s_and_b32 s4, s10, 15
; GFX6-NEXT: s_andn2_b32 s5, 15, s10
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, s4
; GFX6-NEXT: s_bfe_u32 s4, s6, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
; GFX6-NEXT: s_or_b32 s2, s2, s4
; GFX6-NEXT: s_and_b32 s4, s11, 15
; GFX6-NEXT: s_andn2_b32 s5, 15, s11
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s3, s3, s4
; GFX6-NEXT: s_bfe_u32 s4, s7, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_or_b32 s3, s3, s4
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
-; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshl_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_and_b32 s12, s4, 15
-; GFX8-NEXT: s_bfe_u32 s12, s12, 0x100000
-; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_lshr_b32 s10, s4, 16
+; GFX8-NEXT: s_and_b32 s12, s4, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s4
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s12, 0xffff, s12
+; GFX8-NEXT: s_lshr_b32 s2, s2, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshl_b32 s0, s0, s12
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s12, 1, 0x100000
-; GFX8-NEXT: s_lshr_b32 s2, s2, s12
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s2, s4
; GFX8-NEXT: s_or_b32 s0, s0, s2
; GFX8-NEXT: s_and_b32 s2, s10, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s10
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s2, s6, s2
-; GFX8-NEXT: s_lshr_b32 s6, s8, s12
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_lshr_b32 s6, s8, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshr_b32 s4, s6, s4
; GFX8-NEXT: s_or_b32 s2, s2, s4
; GFX8-NEXT: s_and_b32 s4, s5, 15
; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_lshr_b32 s11, s5, 16
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_lshl_b32 s1, s1, s4
-; GFX8-NEXT: s_lshr_b32 s3, s3, s12
-; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, s4
; GFX8-NEXT: s_or_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s3, s11, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s11
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX8-NEXT: s_lshr_b32 s5, s9, s12
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX8-NEXT: s_lshr_b32 s5, s9, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s3, s7, s3
; GFX8-NEXT: s_lshr_b32 s4, s5, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s3, s3, s4
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_and_b32_e32 v12, 15, v8
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT: v_bfe_u32 v12, v12, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v12, 0xffff, v12
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
-; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v9
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
-; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
-; GFX6-NEXT: v_bfe_u32 v5, v5, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v11
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
-; GFX6-NEXT: v_bfe_u32 v5, v5, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3
; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v4, v4
; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1
; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v5
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX8-NEXT: v_not_b32_e32 v4, v4
; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX9-NEXT: v_not_b32_e32 v4, v4
; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4
+; GFX10-NEXT: v_not_b32_e32 v5, v4
; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v4
+; GFX11-NEXT: v_not_b32_e32 v5, v4
; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX6-LABEL: v_fshl_i64_ssv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_and_b32_e32 v1, 63, v0
-; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v1
; GFX6-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
; GFX8-LABEL: v_fshl_i64_ssv:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v1, 63, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
; GFX8-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
; GFX9-LABEL: v_fshl_i64_ssv:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v1, 63, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
; GFX9-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
;
; GFX10-LABEL: v_fshl_i64_ssv:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
;
; GFX11-LABEL: v_fshl_i64_ssv:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX11-NEXT: v_not_b32_e32 v1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 63, v0
; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v8, v8
; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], 1
; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v9
; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v8
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX6-NEXT: v_not_b32_e32 v8, v10
; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], 1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v8, v8
; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX8-NEXT: v_not_b32_e32 v8, v10
; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v8, v8
; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX9-NEXT: v_not_b32_e32 v8, v10
; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8
-; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10
+; GFX10-NEXT: v_not_b32_e32 v9, v8
+; GFX10-NEXT: v_not_b32_e32 v11, v10
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v8
-; GFX11-NEXT: v_xor_b32_e32 v11, -1, v10
+; GFX11-NEXT: v_not_b32_e32 v9, v8
+; GFX11-NEXT: v_not_b32_e32 v11, v10
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX11-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
; GFX11-NEXT: v_and_b32_e32 v8, 63, v8
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v8, v8
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v14
; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v14
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v8, v8
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v14
; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v14
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v8, v8
; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v14
; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v14
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8
-; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v8, v8
; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX10-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7]
; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8
-; GFX11-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v8, v8
; GFX11-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
; GFX6-LABEL: v_fshl_i128_ssv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v6
; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v0
; GFX8-LABEL: v_fshl_i128_ssv:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v6
; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
; GFX9-LABEL: v_fshl_i128_ssv:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX9-NEXT: v_sub_u32_e32 v0, 64, v6
; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
; GFX10-LABEL: v_fshl_i128_ssv:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v0
-; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: s_mov_b32 s8, 0
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
; GFX10-NEXT: s_lshl_b32 s9, s6, 31
; GFX11-LABEL: v_fshl_i128_ssv:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v0
-; GFX11-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX11-NEXT: v_not_b32_e32 v0, v0
; GFX11-NEXT: s_mov_b32 s8, 0
; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
; GFX11-NEXT: s_lshl_b32 s9, s6, 31
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX6-NEXT: v_not_b32_e32 v16, v16
; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v23
; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v23
; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; GFX6-NEXT: v_or_b32_e32 v0, v18, v0
; GFX6-NEXT: v_and_b32_e32 v18, 0x7f, v20
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX6-NEXT: v_not_b32_e32 v8, v20
; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX6-NEXT: v_or_b32_e32 v1, v19, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX8-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX8-NEXT: v_not_b32_e32 v16, v16
; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v23
; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v23
; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; GFX8-NEXT: v_or_b32_e32 v0, v18, v0
; GFX8-NEXT: v_and_b32_e32 v18, 0x7f, v20
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX8-NEXT: v_not_b32_e32 v8, v20
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
; GFX8-NEXT: v_or_b32_e32 v1, v19, v1
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX9-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX9-NEXT: v_not_b32_e32 v16, v16
; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX9-NEXT: v_sub_u32_e32 v16, 64, v23
; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v23
; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
; GFX9-NEXT: v_or_b32_e32 v0, v18, v0
; GFX9-NEXT: v_and_b32_e32 v18, 0x7f, v20
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX9-NEXT: v_not_b32_e32 v8, v20
; GFX9-NEXT: v_or_b32_e32 v1, v19, v1
; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v8
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v18
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16
-; GFX10-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX10-NEXT: v_not_b32_e32 v16, v16
; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27
; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v16
; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v0, s4
; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, v1, s4
; GFX10-NEXT: v_or_b32_e32 v0, v21, v3
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v20
+; GFX10-NEXT: v_not_b32_e32 v3, v20
; GFX10-NEXT: v_or_b32_e32 v1, v22, v8
; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13]
; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v24
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16
-; GFX11-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX11-NEXT: v_not_b32_e32 v16, v16
; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0
; GFX11-NEXT: v_cndmask_b32_e64 v25, 0, v1, s0
; GFX11-NEXT: v_or_b32_e32 v0, v21, v3
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v20
+; GFX11-NEXT: v_not_b32_e32 v3, v20
; GFX11-NEXT: v_or_b32_e32 v1, v22, v8
; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13]
; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
; GFX8-NEXT: s_and_b32 s3, s2, 7
; GFX8-NEXT: s_andn2_b32 s2, 7, s2
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
; GFX8-NEXT: s_lshr_b32 s1, s1, s3
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: s_and_b32 s3, s2, 7
; GFX9-NEXT: s_andn2_b32 s2, 7, s2
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, s2
; GFX9-NEXT: s_lshr_b32 s1, s1, s3
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: s_and_b32 s3, s2, 7
; GFX10-NEXT: s_andn2_b32 s2, 7, s2
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
; GFX10-NEXT: s_lshr_b32 s1, s1, s3
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: s_and_b32 s3, s2, 7
; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_lshl_b32 s0, s0, s2
; GFX11-NEXT: s_lshr_b32 s1, s1, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v3, v2
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v3, v2
; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX8-LABEL: s_fshr_i8_4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, 4
; GFX8-NEXT: s_lshr_b32 s1, s1, 4
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX9-LABEL: s_fshr_i8_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, 4
; GFX9-NEXT: s_lshr_b32 s1, s1, 4
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 4
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s1, s1, 4
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshl_b32 s0, s0, 4
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshr_b32 s1, s1, 4
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX8-LABEL: s_fshr_i8_5:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, 3
; GFX8-NEXT: s_lshr_b32 s1, s1, 5
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX9-LABEL: s_fshr_i8_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, 3
; GFX9-NEXT: s_lshr_b32 s1, s1, 5
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 3
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshr_b32 s1, s1, 5
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshl_b32 s0, s0, 3
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_lshr_b32 s1, s1, 5
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_andn2_b32 s2, 7, s5
; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: s_lshr_b32 s1, s1, s6
; GFX8-NEXT: s_and_b32 s3, s4, 0xff
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s5, 7
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshr_b32 s1, s3, s1
; GFX8-NEXT: s_or_b32 s1, s2, s1
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_andn2_b32 s2, 7, s5
; GFX9-NEXT: s_lshl_b32 s3, s3, 1
; GFX9-NEXT: s_lshr_b32 s1, s1, s6
; GFX9-NEXT: s_and_b32 s3, s4, 0xff
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: s_and_b32 s1, s5, 7
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_lshr_b32 s1, s3, s1
; GFX9-NEXT: s_or_b32 s1, s2, s1
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-NEXT: s_and_b32 s2, s5, 7
; GFX10-NEXT: s_andn2_b32 s5, 7, s5
; GFX10-NEXT: s_lshl_b32 s3, s3, 1
-; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshl_b32 s3, s3, s5
; GFX10-NEXT: s_lshr_b32 s2, s4, s2
; GFX10-NEXT: s_lshr_b32 s1, s1, s6
; GFX10-NEXT: s_or_b32 s2, s3, s2
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: s_and_b32 s1, s2, 0xff
-; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX10-NEXT: s_and_b32 s0, s0, 0xff
-; GFX10-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10-NEXT: s_lshl_b32 s1, s1, 8
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-NEXT: s_and_b32 s2, s5, 7
; GFX11-NEXT: s_and_not1_b32 s5, 7, s5
; GFX11-NEXT: s_lshl_b32 s3, s3, 1
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_lshl_b32 s3, s3, s5
; GFX11-NEXT: s_lshr_b32 s2, s4, s2
; GFX11-NEXT: s_lshr_b32 s1, s1, s6
; GFX11-NEXT: s_or_b32 s2, s3, s2
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: s_and_b32 s1, s2, 0xff
-; GFX11-NEXT: s_bfe_u32 s2, 8, 0x100000
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
-; GFX11-NEXT: s_lshl_b32 s1, s1, s2
+; GFX11-NEXT: s_lshl_b32 s1, s1, 8
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v4, v4
; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5
+; GFX8-NEXT: v_not_b32_e32 v2, v5
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5
+; GFX9-NEXT: v_not_b32_e32 v2, v5
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX10-NEXT: v_and_b32_e32 v7, 7, v2
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
-; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v2, v2
+; GFX10-NEXT: v_not_b32_e32 v6, v3
; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
; GFX11-NEXT: v_and_b32_e32 v7, 7, v2
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
-; GFX11-NEXT: v_xor_b32_e32 v6, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v2, v2
+; GFX11-NEXT: v_not_b32_e32 v6, v3
; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4
; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_andn2_b32 s2, 7, s9
; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: s_lshr_b32 s1, s1, s12
; GFX8-NEXT: s_and_b32 s3, s6, 0xff
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s9, 7
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshr_b32 s1, s3, s1
; GFX8-NEXT: s_andn2_b32 s3, 7, s10
; GFX8-NEXT: s_lshl_b32 s4, s4, 1
; GFX8-NEXT: s_and_b32 s4, s7, 0xff
; GFX8-NEXT: s_or_b32 s1, s2, s1
; GFX8-NEXT: s_and_b32 s2, s10, 7
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshr_b32 s2, s4, s2
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_or_b32 s2, s3, s2
; GFX9-NEXT: s_lshl_b32 s0, s0, 1
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_andn2_b32 s2, 7, s9
; GFX9-NEXT: s_lshl_b32 s3, s3, 1
; GFX9-NEXT: s_lshr_b32 s1, s1, s12
; GFX9-NEXT: s_and_b32 s3, s6, 0xff
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: s_and_b32 s1, s9, 7
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_lshr_b32 s1, s3, s1
; GFX9-NEXT: s_andn2_b32 s3, 7, s10
; GFX9-NEXT: s_lshl_b32 s4, s4, 1
; GFX9-NEXT: s_and_b32 s4, s7, 0xff
; GFX9-NEXT: s_or_b32 s1, s2, s1
; GFX9-NEXT: s_and_b32 s2, s10, 7
-; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
; GFX9-NEXT: s_lshr_b32 s2, s4, s2
; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: s_or_b32 s2, s3, s2
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
; GFX10-NEXT: s_lshl_b32 s0, s0, 1
; GFX10-NEXT: s_and_b32 s6, s6, 0xff
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
; GFX10-NEXT: s_and_b32 s2, s9, 7
; GFX10-NEXT: s_andn2_b32 s9, 7, s9
; GFX10-NEXT: s_lshl_b32 s3, s3, 1
-; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
; GFX10-NEXT: s_lshr_b32 s1, s1, s12
; GFX10-NEXT: s_lshl_b32 s3, s3, s9
; GFX10-NEXT: s_lshr_b32 s2, s6, s2
; GFX10-NEXT: s_and_b32 s2, s10, 7
; GFX10-NEXT: s_andn2_b32 s3, 7, s10
; GFX10-NEXT: s_lshl_b32 s4, s4, 1
-; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
; GFX10-NEXT: s_lshl_b32 s3, s4, s3
; GFX10-NEXT: s_lshr_b32 s2, s6, s2
; GFX10-NEXT: s_andn2_b32 s4, 7, s11
; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_lshl_b32 s0, s0, 1
; GFX11-NEXT: s_and_b32 s6, s6, 0xff
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_lshl_b32 s0, s0, s2
; GFX11-NEXT: s_and_b32 s2, s9, 7
; GFX11-NEXT: s_and_not1_b32 s9, 7, s9
; GFX11-NEXT: s_lshl_b32 s3, s3, 1
-; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
; GFX11-NEXT: s_lshr_b32 s1, s1, s12
; GFX11-NEXT: s_lshl_b32 s3, s3, s9
; GFX11-NEXT: s_lshr_b32 s2, s6, s2
; GFX11-NEXT: s_and_b32 s2, s10, 7
; GFX11-NEXT: s_and_not1_b32 s3, 7, s10
; GFX11-NEXT: s_lshl_b32 s4, s4, 1
-; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
; GFX11-NEXT: s_lshl_b32 s3, s4, s3
; GFX11-NEXT: s_lshr_b32 s2, s6, s2
; GFX11-NEXT: s_and_not1_b32 s4, 7, s11
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2
; GFX6-NEXT: v_and_b32_e32 v10, 7, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v2, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v10, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_and_b32_e32 v2, 7, v7
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX6-NEXT: v_not_b32_e32 v7, v7
; GFX6-NEXT: v_and_b32_e32 v7, 7, v7
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3
; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v7
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1
; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
; GFX6-NEXT: v_and_b32_e32 v3, 7, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v9
+; GFX6-NEXT: v_not_b32_e32 v4, v9
; GFX6-NEXT: v_and_b32_e32 v3, 7, v9
; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2
; GFX8-NEXT: v_and_b32_e32 v8, 7, v2
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX8-NEXT: v_or_b32_e32 v2, v2, v8
; GFX8-NEXT: v_and_b32_e32 v8, 7, v5
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX8-NEXT: v_not_b32_e32 v5, v5
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
; GFX8-NEXT: v_and_b32_e32 v4, 7, v6
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6
+; GFX8-NEXT: v_not_b32_e32 v5, v6
; GFX8-NEXT: v_mov_b32_e32 v6, 1
-; GFX8-NEXT: v_mov_b32_e32 v9, 0xff
; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8
-; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v8, 0xff
+; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8
; GFX8-NEXT: v_or_b32_e32 v4, v5, v4
; GFX8-NEXT: v_and_b32_e32 v5, 7, v7
-; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX8-NEXT: v_not_b32_e32 v7, v7
; GFX8-NEXT: v_and_b32_e32 v7, 7, v7
; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2
; GFX9-NEXT: v_and_b32_e32 v8, 7, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0
; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
; GFX9-NEXT: v_or_b32_e32 v2, v2, v8
; GFX9-NEXT: v_and_b32_e32 v8, 7, v5
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5
+; GFX9-NEXT: v_not_b32_e32 v5, v5
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3
; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
; GFX9-NEXT: v_and_b32_e32 v4, 7, v6
-; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6
+; GFX9-NEXT: v_not_b32_e32 v5, v6
; GFX9-NEXT: v_mov_b32_e32 v6, 1
-; GFX9-NEXT: v_mov_b32_e32 v9, 0xff
+; GFX9-NEXT: s_movk_i32 s4, 0xff
; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8
-; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT: v_and_b32_sdwa v8, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8
; GFX9-NEXT: v_or_b32_e32 v4, v5, v4
; GFX9-NEXT: v_and_b32_e32 v5, 7, v7
-; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX9-NEXT: v_not_b32_e32 v7, v7
; GFX9-NEXT: v_and_b32_e32 v7, 7, v7
; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0
; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 8
-; GFX9-NEXT: s_movk_i32 s4, 0xff
; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1
; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2
+; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
-; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2
-; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2
-; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2
-; GFX10-NEXT: v_xor_b32_e32 v10, -1, v5
+; GFX10-NEXT: v_not_b32_e32 v8, v2
+; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
+; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
+; GFX10-NEXT: v_not_b32_e32 v12, v7
; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1
-; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
-; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v12, 7, v12
; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
-; GFX10-NEXT: v_mov_b32_e32 v13, 0xff
-; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12
-; GFX10-NEXT: v_lshlrev_b16 v3, v10, v3
-; GFX10-NEXT: v_xor_b32_e32 v10, -1, v11
+; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
+; GFX10-NEXT: v_not_b32_e32 v13, v10
+; GFX10-NEXT: s_movk_i32 s4, 0xff
+; GFX10-NEXT: v_lshlrev_b16 v3, v12, v3
+; GFX10-NEXT: v_not_b32_e32 v12, v11
; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1
; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0
; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1
-; GFX10-NEXT: v_and_b32_e32 v5, 7, v5
-; GFX10-NEXT: v_and_b32_e32 v7, 0xff, v7
-; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
+; GFX10-NEXT: v_and_b32_e32 v7, 7, v7
+; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6
; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
+; GFX10-NEXT: v_and_b32_e32 v13, 7, v13
; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
-; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX10-NEXT: v_and_b32_e32 v13, 7, v14
-; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6
+; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX10-NEXT: v_and_b32_e32 v12, 7, v12
+; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5
+; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
-; GFX10-NEXT: v_lshrrev_b16 v5, v5, v7
-; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4
-; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1
-; GFX10-NEXT: v_lshlrev_b16 v6, v13, v6
-; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9
+; GFX10-NEXT: v_lshrrev_b16 v6, v7, v6
+; GFX10-NEXT: v_lshlrev_b16 v4, v13, v4
+; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1
+; GFX10-NEXT: v_lshlrev_b16 v5, v12, v5
+; GFX10-NEXT: v_lshrrev_b16 v7, v11, v9
; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8
-; GFX10-NEXT: v_or_b32_e32 v3, v3, v5
-; GFX10-NEXT: v_mov_b32_e32 v5, 8
+; GFX10-NEXT: v_or_b32_e32 v3, v3, v6
+; GFX10-NEXT: v_mov_b32_e32 v6, 8
; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
-; GFX10-NEXT: v_or_b32_e32 v4, v6, v7
+; GFX10-NEXT: v_or_b32_e32 v4, v5, v7
; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v2
; GFX11-NEXT: v_lshrrev_b32_e32 v13, 24, v2
; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
-; GFX11-NEXT: v_xor_b32_e32 v12, -1, v7
+; GFX11-NEXT: v_not_b32_e32 v12, v7
; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
; GFX11-NEXT: v_and_b32_e32 v12, 7, v12
; GFX11-NEXT: v_lshlrev_b16 v3, 1, v3
-; GFX11-NEXT: v_xor_b32_e32 v14, -1, v11
+; GFX11-NEXT: v_not_b32_e32 v14, v11
; GFX11-NEXT: v_lshrrev_b16 v6, v7, v6
-; GFX11-NEXT: v_xor_b32_e32 v7, -1, v13
+; GFX11-NEXT: v_not_b32_e32 v7, v13
; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v1
-; GFX11-NEXT: v_xor_b32_e32 v10, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v10, v2
; GFX11-NEXT: v_lshlrev_b16 v3, v12, v3
; GFX11-NEXT: v_and_b32_e32 v11, 7, v11
; GFX11-NEXT: v_and_b32_e32 v12, 7, v14
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
; GFX6-NEXT: s_and_b32 s8, s8, 0xff
; GFX6-NEXT: s_or_b32 s2, s11, s2
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: s_lshr_b32 s10, s3, 8
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
; GFX6-NEXT: s_or_b32 s2, s2, s8
; GFX6-NEXT: s_and_b32 s8, s10, 0xff
; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX6-NEXT: s_or_b32 s3, s9, s3
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: s_or_b32 s3, s3, s8
; GFX6-NEXT: s_lshr_b32 s8, s4, 16
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
; GFX6-NEXT: s_or_b32 s4, s11, s4
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: s_or_b32 s4, s4, s8
; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX6-NEXT: s_and_b32 s8, s10, 0xff
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX6-NEXT: s_or_b32 s5, s9, s5
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshl_b32 s8, s8, 16
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX6-NEXT: s_or_b32 s5, s5, s8
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0
; GFX6-NEXT: s_lshl_b32 s4, s6, 17
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
; GFX6-NEXT: s_lshl_b32 s0, s7, 17
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: s_lshr_b32 s9, s1, 8
-; GFX8-NEXT: s_bfe_u32 s10, 8, 0x100000
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_lshr_b32 s6, s0, 8
; GFX8-NEXT: s_lshr_b32 s8, s0, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s10
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX8-NEXT: s_and_b32 s6, s6, 0xff
; GFX8-NEXT: s_or_b32 s1, s8, s1
; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX8-NEXT: s_lshr_b32 s7, s0, 16
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s6, s6, s10
+; GFX8-NEXT: s_lshl_b32 s6, s6, 8
; GFX8-NEXT: s_and_b32 s8, s8, 0xff
; GFX8-NEXT: s_or_b32 s0, s0, s6
; GFX8-NEXT: s_and_b32 s6, s7, 0xff
; GFX8-NEXT: s_and_b32 s7, s9, 0xff
; GFX8-NEXT: s_lshr_b32 s9, s2, 16
-; GFX8-NEXT: s_lshr_b32 s11, s2, 24
+; GFX8-NEXT: s_lshr_b32 s10, s2, 24
; GFX8-NEXT: s_and_b32 s2, s2, 0xff
-; GFX8-NEXT: s_lshl_b32 s8, s8, s10
+; GFX8-NEXT: s_lshl_b32 s8, s8, 8
; GFX8-NEXT: s_or_b32 s2, s2, s8
; GFX8-NEXT: s_and_b32 s8, s9, 0xff
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
-; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX8-NEXT: s_lshr_b32 s12, s3, 8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_lshr_b32 s11, s3, 8
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_and_b32 s3, s3, 0xff
; GFX8-NEXT: s_or_b32 s2, s2, s8
-; GFX8-NEXT: s_lshl_b32 s3, s3, s10
-; GFX8-NEXT: s_and_b32 s8, s12, 0xff
-; GFX8-NEXT: s_or_b32 s3, s11, s3
-; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_lshl_b32 s3, s3, 8
+; GFX8-NEXT: s_and_b32 s8, s11, 0xff
+; GFX8-NEXT: s_or_b32 s3, s10, s3
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX8-NEXT: s_or_b32 s3, s3, s8
; GFX8-NEXT: s_lshr_b32 s8, s4, 8
; GFX8-NEXT: s_and_b32 s8, s8, 0xff
; GFX8-NEXT: s_lshr_b32 s9, s4, 16
-; GFX8-NEXT: s_lshr_b32 s11, s4, 24
+; GFX8-NEXT: s_lshr_b32 s10, s4, 24
; GFX8-NEXT: s_and_b32 s4, s4, 0xff
-; GFX8-NEXT: s_lshl_b32 s8, s8, s10
+; GFX8-NEXT: s_lshl_b32 s8, s8, 8
; GFX8-NEXT: s_or_b32 s4, s4, s8
; GFX8-NEXT: s_and_b32 s8, s9, 0xff
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
-; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: s_or_b32 s4, s4, s8
; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0
; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX8-NEXT: s_lshr_b32 s12, s5, 8
+; GFX8-NEXT: s_lshr_b32 s11, s5, 8
; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX8-NEXT: s_and_b32 s5, s5, 0xff
; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
-; GFX8-NEXT: s_lshl_b32 s5, s5, s10
+; GFX8-NEXT: s_lshl_b32 s5, s5, 8
; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1
-; GFX8-NEXT: s_and_b32 s8, s12, 0xff
+; GFX8-NEXT: s_and_b32 s8, s11, 0xff
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT: s_or_b32 s5, s11, s5
-; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX8-NEXT: s_or_b32 s5, s10, s5
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
-; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s8, s8, 16
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
; GFX8-NEXT: s_or_b32 s5, s5, s8
; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0
; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX8-NEXT: s_lshl_b32 s4, s6, 17
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
; GFX8-NEXT: s_lshl_b32 s0, s7, 17
; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: s_lshr_b32 s11, s1, 8
-; GFX9-NEXT: s_bfe_u32 s12, 8, 0x100000
+; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT: s_and_b32 s1, s1, 0xff
; GFX9-NEXT: s_lshr_b32 s7, s0, 8
; GFX9-NEXT: s_lshr_b32 s10, s0, 24
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1
-; GFX9-NEXT: s_lshl_b32 s1, s1, s12
; GFX9-NEXT: s_and_b32 s7, s7, 0xff
; GFX9-NEXT: s_or_b32 s1, s10, s1
-; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX9-NEXT: s_lshr_b32 s10, s2, 8
+; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX9-NEXT: s_lshr_b32 s9, s0, 16
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
-; GFX9-NEXT: s_lshl_b32 s7, s7, s12
+; GFX9-NEXT: s_lshl_b32 s7, s7, 8
; GFX9-NEXT: s_and_b32 s10, s10, 0xff
; GFX9-NEXT: s_or_b32 s0, s0, s7
; GFX9-NEXT: s_and_b32 s7, s9, 0xff
; GFX9-NEXT: s_and_b32 s9, s11, 0xff
; GFX9-NEXT: s_lshr_b32 s11, s2, 16
-; GFX9-NEXT: s_lshr_b32 s13, s2, 24
+; GFX9-NEXT: s_lshr_b32 s12, s2, 24
; GFX9-NEXT: s_and_b32 s2, s2, 0xff
-; GFX9-NEXT: s_lshl_b32 s10, s10, s12
+; GFX9-NEXT: s_lshl_b32 s10, s10, 8
; GFX9-NEXT: s_or_b32 s2, s2, s10
; GFX9-NEXT: s_and_b32 s10, s11, 0xff
; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
-; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000
+; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
-; GFX9-NEXT: s_lshr_b32 s14, s3, 8
-; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX9-NEXT: s_lshr_b32 s13, s3, 8
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
; GFX9-NEXT: s_lshl_b32 s10, s10, 16
; GFX9-NEXT: s_and_b32 s3, s3, 0xff
; GFX9-NEXT: s_or_b32 s2, s2, s10
-; GFX9-NEXT: s_lshl_b32 s3, s3, s12
-; GFX9-NEXT: s_and_b32 s10, s14, 0xff
-; GFX9-NEXT: s_or_b32 s3, s13, s3
-; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000
-; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX9-NEXT: s_lshl_b32 s3, s3, 8
+; GFX9-NEXT: s_and_b32 s10, s13, 0xff
+; GFX9-NEXT: s_or_b32 s3, s12, s3
+; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
+; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
; GFX9-NEXT: s_lshl_b32 s10, s10, 16
; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX9-NEXT: s_or_b32 s3, s3, s10
; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX9-NEXT: s_and_b32 s10, s10, 0xff
; GFX9-NEXT: s_lshr_b32 s11, s4, 16
-; GFX9-NEXT: s_lshr_b32 s13, s4, 24
+; GFX9-NEXT: s_lshr_b32 s12, s4, 24
; GFX9-NEXT: s_and_b32 s4, s4, 0xff
-; GFX9-NEXT: s_lshl_b32 s10, s10, s12
+; GFX9-NEXT: s_lshl_b32 s10, s10, 8
; GFX9-NEXT: s_or_b32 s4, s4, s10
; GFX9-NEXT: s_and_b32 s10, s11, 0xff
-; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000
+; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
-; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
; GFX9-NEXT: s_lshl_b32 s10, s10, 16
; GFX9-NEXT: s_or_b32 s4, s4, s10
; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0
-; GFX9-NEXT: s_lshr_b32 s14, s5, 8
+; GFX9-NEXT: s_lshr_b32 s13, s5, 8
; GFX9-NEXT: s_and_b32 s5, s5, 0xff
; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1
-; GFX9-NEXT: s_lshl_b32 s5, s5, s12
-; GFX9-NEXT: s_and_b32 s10, s14, 0xff
-; GFX9-NEXT: s_or_b32 s5, s13, s5
-; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000
-; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX9-NEXT: s_lshl_b32 s5, s5, 8
+; GFX9-NEXT: s_and_b32 s10, s13, 0xff
+; GFX9-NEXT: s_or_b32 s5, s12, s5
+; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
+; GFX9-NEXT: s_and_b32 s5, 0xffff, s5
; GFX9-NEXT: s_lshl_b32 s10, s10, 16
; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX9-NEXT: s_or_b32 s5, s5, s10
; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
-; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0
; GFX9-NEXT: s_lshl_b32 s4, s7, 17
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_and_b32 s9, 0xffff, s9
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1
; GFX9-NEXT: s_lshl_b32 s0, s9, 17
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
; GFX10-NEXT: s_lshr_b32 s9, s1, 8
-; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000
; GFX10-NEXT: s_and_b32 s1, s1, 0xff
+; GFX10-NEXT: s_lshr_b32 s6, s0, 8
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX10-NEXT: s_lshr_b32 s6, s0, 8
; GFX10-NEXT: s_lshr_b32 s8, s0, 24
-; GFX10-NEXT: s_lshl_b32 s1, s1, s10
+; GFX10-NEXT: s_lshl_b32 s1, s1, 8
; GFX10-NEXT: s_and_b32 s6, s6, 0xff
; GFX10-NEXT: s_or_b32 s1, s8, s1
; GFX10-NEXT: s_lshr_b32 s8, s4, 8
; GFX10-NEXT: s_lshr_b32 s7, s0, 16
; GFX10-NEXT: s_and_b32 s0, s0, 0xff
+; GFX10-NEXT: s_lshl_b32 s6, s6, 8
; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; GFX10-NEXT: s_lshl_b32 s6, s6, s10
; GFX10-NEXT: s_and_b32 s8, s8, 0xff
; GFX10-NEXT: s_or_b32 s0, s0, s6
+; GFX10-NEXT: s_and_b32 s6, s7, 0xff
; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT: s_and_b32 s6, s7, 0xff
; GFX10-NEXT: s_and_b32 s7, s9, 0xff
; GFX10-NEXT: s_lshr_b32 s9, s4, 16
+; GFX10-NEXT: s_lshr_b32 s10, s4, 24
; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
-; GFX10-NEXT: s_lshr_b32 s11, s4, 24
; GFX10-NEXT: s_and_b32 s4, s4, 0xff
-; GFX10-NEXT: s_lshl_b32 s8, s8, s10
-; GFX10-NEXT: s_lshr_b32 s12, s5, 8
+; GFX10-NEXT: s_lshl_b32 s8, s8, 8
+; GFX10-NEXT: s_lshr_b32 s11, s5, 8
; GFX10-NEXT: s_or_b32 s4, s4, s8
; GFX10-NEXT: s_and_b32 s8, s9, 0xff
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
-; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX10-NEXT: s_and_b32 s8, 0xffff, s8
; GFX10-NEXT: s_and_b32 s5, s5, 0xff
; GFX10-NEXT: s_lshl_b32 s8, s8, 16
-; GFX10-NEXT: s_lshl_b32 s5, s5, s10
+; GFX10-NEXT: s_lshl_b32 s5, s5, 8
; GFX10-NEXT: s_or_b32 s4, s4, s8
+; GFX10-NEXT: s_and_b32 s8, s11, 0xff
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
-; GFX10-NEXT: s_and_b32 s8, s12, 0xff
-; GFX10-NEXT: s_or_b32 s5, s11, s5
-; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX10-NEXT: s_or_b32 s5, s10, s5
+; GFX10-NEXT: s_and_b32 s8, 0xffff, s8
; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0
-; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000
; GFX10-NEXT: s_lshl_b32 s8, s8, 16
; GFX10-NEXT: s_lshr_b32 s9, s2, 8
; GFX10-NEXT: s_or_b32 s5, s5, s8
; GFX10-NEXT: s_lshr_b32 s8, s2, 16
; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1
; GFX10-NEXT: s_and_b32 s9, s9, 0xff
+; GFX10-NEXT: s_lshr_b32 s10, s2, 24
; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
-; GFX10-NEXT: s_lshr_b32 s11, s2, 24
-; GFX10-NEXT: s_lshr_b32 s12, s3, 8
+; GFX10-NEXT: s_lshr_b32 s11, s3, 8
; GFX10-NEXT: s_and_b32 s2, s2, 0xff
-; GFX10-NEXT: s_lshl_b32 s9, s9, s10
+; GFX10-NEXT: s_lshl_b32 s9, s9, 8
; GFX10-NEXT: s_and_b32 s8, s8, 0xff
; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
; GFX10-NEXT: s_and_b32 s3, s3, 0xff
-; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
; GFX10-NEXT: s_or_b32 s2, s2, s9
-; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000
-; GFX10-NEXT: s_lshl_b32 s3, s3, s10
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
+; GFX10-NEXT: s_and_b32 s4, 0xffff, s8
+; GFX10-NEXT: s_lshl_b32 s3, s3, 8
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX10-NEXT: s_and_b32 s5, s12, 0xff
-; GFX10-NEXT: s_lshl_b32 s4, s4, 16
-; GFX10-NEXT: s_or_b32 s3, s11, s3
-; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX10-NEXT: s_and_b32 s5, s11, 0xff
+; GFX10-NEXT: s_or_b32 s3, s10, s3
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX10-NEXT: s_lshl_b32 s5, s5, 16
-; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
; GFX10-NEXT: s_or_b32 s2, s2, s4
+; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
+; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
; GFX10-NEXT: s_or_b32 s3, s3, s5
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
; GFX10-NEXT: s_lshl_b32 s4, s6, 17
+; GFX10-NEXT: s_lshl_b32 s0, s0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0
; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
-; GFX10-NEXT: s_lshl_b32 s0, s0, 1
+; GFX10-NEXT: s_or_b32 s0, s4, s0
; GFX10-NEXT: s_lshl_b32 s1, s1, 1
; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1
; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2
-; GFX10-NEXT: s_or_b32 s0, s4, s0
; GFX10-NEXT: s_lshl_b32 s2, s7, 17
; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
; GFX11-NEXT: s_lshr_b32 s6, s0, 8
-; GFX11-NEXT: s_bfe_u32 s9, 8, 0x100000
+; GFX11-NEXT: s_lshr_b32 s7, s0, 16
; GFX11-NEXT: s_and_b32 s6, s6, 0xff
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1
-; GFX11-NEXT: s_lshr_b32 s7, s0, 16
; GFX11-NEXT: s_lshr_b32 s8, s0, 24
; GFX11-NEXT: s_and_b32 s0, s0, 0xff
-; GFX11-NEXT: s_lshl_b32 s6, s6, s9
-; GFX11-NEXT: s_lshr_b32 s10, s1, 8
+; GFX11-NEXT: s_lshl_b32 s6, s6, 8
+; GFX11-NEXT: s_lshr_b32 s9, s1, 8
; GFX11-NEXT: s_or_b32 s0, s0, s6
; GFX11-NEXT: s_and_b32 s6, s7, 0xff
-; GFX11-NEXT: s_and_b32 s7, s10, 0xff
+; GFX11-NEXT: s_and_b32 s7, s9, 0xff
+; GFX11-NEXT: s_lshr_b32 s9, s4, 8
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1
-; GFX11-NEXT: s_lshr_b32 s10, s4, 8
-; GFX11-NEXT: s_lshr_b32 s11, s4, 16
-; GFX11-NEXT: s_and_b32 s10, s10, 0xff
+; GFX11-NEXT: s_lshr_b32 s10, s4, 16
+; GFX11-NEXT: s_and_b32 s9, s9, 0xff
+; GFX11-NEXT: s_and_b32 s11, s4, 0xff
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT: s_and_b32 s12, s4, 0xff
-; GFX11-NEXT: s_lshl_b32 s10, s10, s9
-; GFX11-NEXT: s_and_b32 s11, s11, 0xff
+; GFX11-NEXT: s_lshl_b32 s9, s9, 8
+; GFX11-NEXT: s_and_b32 s10, s10, 0xff
+; GFX11-NEXT: s_or_b32 s9, s11, s9
; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
-; GFX11-NEXT: s_or_b32 s10, s12, s10
; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
-; GFX11-NEXT: s_bfe_u32 s11, s11, 0x100000
-; GFX11-NEXT: s_bfe_u32 s10, s10, 0x100000
-; GFX11-NEXT: s_lshl_b32 s11, s11, 16
-; GFX11-NEXT: s_lshr_b32 s12, s5, 8
-; GFX11-NEXT: s_or_b32 s10, s10, s11
-; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2
+; GFX11-NEXT: s_and_b32 s10, 0xffff, s10
+; GFX11-NEXT: s_and_b32 s9, 0xffff, s9
+; GFX11-NEXT: s_lshl_b32 s10, s10, 16
+; GFX11-NEXT: s_lshr_b32 s11, s5, 8
+; GFX11-NEXT: s_or_b32 s9, s9, s10
; GFX11-NEXT: s_and_b32 s5, s5, 0xff
+; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX11-NEXT: s_lshr_b32 s4, s4, 24
-; GFX11-NEXT: s_lshl_b32 s5, s5, s9
-; GFX11-NEXT: s_and_b32 s11, s12, 0xff
+; GFX11-NEXT: s_lshl_b32 s5, s5, 8
+; GFX11-NEXT: s_and_b32 s10, s11, 0xff
; GFX11-NEXT: s_or_b32 s4, s4, s5
-; GFX11-NEXT: s_bfe_u32 s5, s11, 0x100000
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX11-NEXT: s_and_b32 s5, 0xffff, s10
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX11-NEXT: s_lshl_b32 s5, s5, 16
; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3
-; GFX11-NEXT: s_lshl_b32 s5, s5, 16
-; GFX11-NEXT: s_and_b32 s1, s1, 0xff
; GFX11-NEXT: s_or_b32 s4, s4, s5
-; GFX11-NEXT: v_mul_hi_u32 v0, s10, v0
-; GFX11-NEXT: s_lshl_b32 s1, s1, s9
-; GFX11-NEXT: s_lshr_b32 s11, s2, 16
+; GFX11-NEXT: s_and_b32 s1, s1, 0xff
+; GFX11-NEXT: s_lshr_b32 s10, s2, 16
+; GFX11-NEXT: v_mul_hi_u32 v0, s9, v0
+; GFX11-NEXT: s_lshl_b32 s1, s1, 8
+; GFX11-NEXT: s_lshr_b32 s5, s2, 24
; GFX11-NEXT: s_or_b32 s1, s8, s1
; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2
; GFX11-NEXT: s_lshr_b32 s8, s2, 8
-; GFX11-NEXT: s_lshr_b32 s5, s2, 24
+; GFX11-NEXT: s_and_b32 s2, s2, 0xff
; GFX11-NEXT: s_and_b32 s8, s8, 0xff
; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1
-; GFX11-NEXT: s_and_b32 s2, s2, 0xff
-; GFX11-NEXT: s_lshl_b32 s8, s8, s9
-; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX11-NEXT: s_lshl_b32 s8, s8, 8
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
; GFX11-NEXT: s_or_b32 s2, s2, s8
-; GFX11-NEXT: s_and_b32 s8, s11, 0xff
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX11-NEXT: v_sub_nc_u32_e32 v0, s10, v0
+; GFX11-NEXT: s_and_b32 s8, s10, 0xff
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX11-NEXT: s_and_b32 s8, 0xffff, s8
+; GFX11-NEXT: v_sub_nc_u32_e32 v0, s9, v0
; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
-; GFX11-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX11-NEXT: s_lshr_b32 s10, s3, 8
+; GFX11-NEXT: s_lshr_b32 s9, s3, 8
; GFX11-NEXT: s_and_b32 s3, s3, 0xff
+; GFX11-NEXT: s_lshl_b32 s8, s8, 16
; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT: s_lshl_b32 s8, s8, 16
-; GFX11-NEXT: s_lshl_b32 s3, s3, s9
+; GFX11-NEXT: s_lshl_b32 s3, s3, 8
+; GFX11-NEXT: s_or_b32 s2, s2, s8
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
-; GFX11-NEXT: s_and_b32 s4, s10, 0xff
+; GFX11-NEXT: s_and_b32 s4, s9, 0xff
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
-; GFX11-NEXT: s_or_b32 s2, s2, s8
-; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
+; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
; GFX11-NEXT: s_or_b32 s3, s5, s3
+; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
+; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
-; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
; GFX11-NEXT: s_lshl_b32 s4, s4, 16
; GFX11-NEXT: s_lshl_b32 s5, s6, 17
+; GFX11-NEXT: s_lshl_b32 s0, s0, 1
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
-; GFX11-NEXT: s_lshl_b32 s0, s0, 1
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
; GFX11-NEXT: s_or_b32 s0, s5, s0
-; GFX11-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_and_b32 s7, 0xffff, s7
; GFX11-NEXT: s_lshl_b32 s1, s1, 1
+; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
; GFX6-NEXT: s_and_b32 s3, s2, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s2
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s0, s0, s2
-; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
; GFX6-NEXT: s_lshr_b32 s1, s1, s2
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s3, s2, 15
; GFX8-NEXT: s_andn2_b32 s2, 15, s2
-; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
; GFX8-NEXT: s_lshr_b32 s1, s1, s2
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s3, s2, 15
; GFX9-NEXT: s_andn2_b32 s2, 15, s2
-; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s4
-; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX9-NEXT: s_lshl_b32 s0, s0, 1
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_and_b32 s2, 0xffff, s3
; GFX9-NEXT: s_lshr_b32 s1, s1, s2
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
; GFX10-LABEL: s_fshr_i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s3, s2, 15
-; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000
; GFX10-NEXT: s_andn2_b32 s2, 15, s2
-; GFX10-NEXT: s_lshl_b32 s0, s0, s4
-; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX10-NEXT: s_lshl_b32 s0, s0, 1
+; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
; GFX10-NEXT: s_lshl_b32 s0, s0, s2
; GFX10-NEXT: s_lshr_b32 s1, s1, s3
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX11-LABEL: s_fshr_i16:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s3, s2, 15
-; GFX11-NEXT: s_bfe_u32 s4, 1, 0x100000
; GFX11-NEXT: s_and_not1_b32 s2, 15, s2
-; GFX11-NEXT: s_lshl_b32 s0, s0, s4
-; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX11-NEXT: s_lshl_b32 s0, s0, 1
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
; GFX11-NEXT: s_lshl_b32 s0, s0, s2
; GFX11-NEXT: s_lshr_b32 s1, s1, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
;
; GFX8-LABEL: s_fshr_i16_4:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 12
+; GFX8-NEXT: s_lshr_b32 s1, s1, 4
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshr_i16_4:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000
-; GFX9-NEXT: s_lshr_b32 s1, s1, s2
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 12
+; GFX9-NEXT: s_lshr_b32 s1, s1, 4
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshr_i16_4:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000
-; GFX10-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10-NEXT: s_lshr_b32 s1, s1, s3
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_lshl_b32 s0, s0, 12
+; GFX10-NEXT: s_lshr_b32 s1, s1, 4
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshr_i16_4:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bfe_u32 s2, 12, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, 4, 0x100000
-; GFX11-NEXT: s_lshl_b32 s0, s0, s2
-; GFX11-NEXT: s_lshr_b32 s1, s1, s3
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_lshl_b32 s0, s0, 12
+; GFX11-NEXT: s_lshr_b32 s1, s1, 4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshr_i16_5:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 11
+; GFX8-NEXT: s_lshr_b32 s1, s1, 5
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshr_i16_5:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000
-; GFX9-NEXT: s_lshr_b32 s1, s1, s2
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 11
+; GFX9-NEXT: s_lshr_b32 s1, s1, 5
; GFX9-NEXT: s_or_b32 s0, s0, s1
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_fshr_i16_5:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000
-; GFX10-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10-NEXT: s_lshr_b32 s1, s1, s3
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX10-NEXT: s_lshl_b32 s0, s0, 11
+; GFX10-NEXT: s_lshr_b32 s1, s1, 5
; GFX10-NEXT: s_or_b32 s0, s0, s1
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_fshr_i16_5:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX11-NEXT: s_bfe_u32 s3, 5, 0x100000
-; GFX11-NEXT: s_lshl_b32 s0, s0, s2
-; GFX11-NEXT: s_lshr_b32 s1, s1, s3
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX11-NEXT: s_lshl_b32 s0, s0, 11
+; GFX11-NEXT: s_lshr_b32 s1, s1, 5
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_or_b32 s0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
-; GFX6-NEXT: v_bfe_u32 v2, v3, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000
; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s0, s0, 1
; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1
; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX10-NEXT: s_lshl_b32 s0, s0, s2
+; GFX10-NEXT: s_lshl_b32 s0, s0, 1
; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1
; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0
; GFX11: ; %bb.0:
; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX11-NEXT: s_bfe_u32 s2, 1, 0x100000
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: s_lshl_b32 s0, s0, s2
-; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
+; GFX11-NEXT: s_lshl_b32 s0, s0, 1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
; GFX11-NEXT: ; return to shader part epilog
%result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
; GFX6-NEXT: s_and_b32 s2, s1, 15
; GFX6-NEXT: s_andn2_b32 s1, 15, s1
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_lshl_b32 s0, s0, s1
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8: ; %bb.0:
; GFX8-NEXT: s_and_b32 s2, s1, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
-; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s0, s0, s1
; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s2, s1, 15
; GFX9-NEXT: s_andn2_b32 s1, 15, s1
-; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s3
-; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX9-NEXT: s_lshl_b32 s0, s0, 1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
; GFX9-NEXT: s_lshl_b32 s0, s0, s1
; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0
; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
; GFX10-LABEL: v_fshr_i16_svs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_and_b32 s2, s1, 15
-; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000
; GFX10-NEXT: s_andn2_b32 s1, 15, s1
; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
-; GFX10-NEXT: s_lshl_b32 s0, s0, s3
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_lshl_b32 s0, s0, 1
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: s_lshl_b32 s0, s0, s1
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
; GFX10-NEXT: ; return to shader part epilog
; GFX11-LABEL: v_fshr_i16_svs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_and_b32 s2, s1, 15
-; GFX11-NEXT: s_bfe_u32 s3, 1, 0x100000
; GFX11-NEXT: s_and_not1_b32 s1, 15, s1
; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0
-; GFX11-NEXT: s_lshl_b32 s0, s0, s3
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_lshl_b32 s0, s0, 1
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_lshl_b32 s0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX6-NEXT: s_and_b32 s2, s1, 15
; GFX6-NEXT: s_andn2_b32 s1, 15, s1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
; GFX6-NEXT: s_lshr_b32 s0, s0, s1
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
; GFX8-NEXT: ; return to shader part epilog
; GFX9-NEXT: s_andn2_b32 s1, 15, s1
; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0
-; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX9-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s2
; GFX9-NEXT: s_lshr_b32 s0, s0, s1
; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
; GFX9-NEXT: ; return to shader part epilog
; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX10-NEXT: s_andn2_b32 s2, 15, s1
; GFX10-NEXT: s_and_b32 s1, s1, 15
-; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
; GFX10-NEXT: s_lshr_b32 s0, s0, s1
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
; GFX11-NEXT: s_and_not1_b32 s2, 15, s1
; GFX11-NEXT: s_and_b32 s1, s1, 15
-; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0
; GFX11-NEXT: s_lshr_b32 s0, s0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX6-NEXT: s_lshl_b32 s5, s5, 16
; GFX6-NEXT: s_and_b32 s4, s4, 0xffff
; GFX6-NEXT: s_or_b32 s4, s5, s4
-; GFX6-NEXT: s_bfe_u32 s5, 1, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s5
-; GFX6-NEXT: s_bfe_u32 s6, s2, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s7, 14, 0x100000
-; GFX6-NEXT: s_lshl_b32 s1, s1, s5
+; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001
+; GFX6-NEXT: s_lshl_b32 s0, s0, 1
+; GFX6-NEXT: s_lshr_b32 s5, s5, 14
+; GFX6-NEXT: s_or_b32 s0, s0, s5
; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001
-; GFX6-NEXT: s_lshr_b32 s6, s6, s7
-; GFX6-NEXT: s_lshr_b32 s5, s5, s7
+; GFX6-NEXT: s_lshl_b32 s1, s1, 1
+; GFX6-NEXT: s_lshr_b32 s5, s5, 14
; GFX6-NEXT: s_xor_b32 s4, s4, -1
-; GFX6-NEXT: s_or_b32 s0, s0, s6
; GFX6-NEXT: s_or_b32 s1, s1, s5
; GFX6-NEXT: s_lshl_b32 s2, s2, 1
; GFX6-NEXT: s_lshr_b32 s5, s4, 16
; GFX6-NEXT: s_and_b32 s6, s4, 15
; GFX6-NEXT: s_andn2_b32 s4, 15, s4
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s0, s0, s6
; GFX6-NEXT: s_lshr_b32 s2, s2, s4
; GFX6-NEXT: s_or_b32 s0, s0, s2
; GFX6-NEXT: s_and_b32 s2, s5, 15
; GFX6-NEXT: s_lshl_b32 s3, s3, 1
; GFX6-NEXT: s_andn2_b32 s4, 15, s5
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: s_lshl_b32 s1, s1, s2
; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
; GFX6-NEXT: s_lshr_b32 s2, s2, s3
; GFX6-NEXT: s_or_b32 s1, s1, s2
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshr_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s5, 1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s6, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s7, 15, 0x100000
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s1
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s1, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s5
-; GFX8-NEXT: s_lshr_b32 s6, s6, s7
-; GFX8-NEXT: s_or_b32 s0, s0, s6
-; GFX8-NEXT: s_lshl_b32 s3, s3, s5
-; GFX8-NEXT: s_lshr_b32 s6, s4, s7
-; GFX8-NEXT: s_lshl_b32 s1, s1, s5
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_lshr_b32 s5, s5, 15
+; GFX8-NEXT: s_or_b32 s0, s0, s5
+; GFX8-NEXT: s_lshl_b32 s3, s3, 1
+; GFX8-NEXT: s_lshr_b32 s5, s4, 15
+; GFX8-NEXT: s_lshl_b32 s1, s1, 1
; GFX8-NEXT: s_xor_b32 s2, s2, -1
-; GFX8-NEXT: s_or_b32 s3, s3, s6
-; GFX8-NEXT: s_lshr_b32 s6, s2, 16
-; GFX8-NEXT: s_and_b32 s7, s2, 15
+; GFX8-NEXT: s_or_b32 s3, s3, s5
+; GFX8-NEXT: s_lshr_b32 s5, s2, 16
+; GFX8-NEXT: s_and_b32 s6, s2, 15
; GFX8-NEXT: s_andn2_b32 s2, 15, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX8-NEXT: s_lshr_b32 s1, s1, s5
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s7
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX8-NEXT: s_lshr_b32 s1, s1, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_lshl_b32 s0, s0, s6
; GFX8-NEXT: s_lshr_b32 s1, s1, s2
; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s6, 15
-; GFX8-NEXT: s_lshl_b32 s4, s4, s5
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_andn2_b32 s2, 15, s6
+; GFX8-NEXT: s_and_b32 s1, s5, 15
+; GFX8-NEXT: s_lshl_b32 s4, s4, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_andn2_b32 s2, 15, s5
; GFX8-NEXT: s_lshl_b32 s1, s3, s1
-; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX8-NEXT: s_lshr_b32 s3, s3, s5
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshr_b32 s2, s3, s2
; GFX8-NEXT: s_or_b32 s1, s1, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_or_b32_e32 v4, v5, v4
-; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000
; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15
-; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
; GFX6-NEXT: v_or_b32_e32 v0, v0, v5
; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
-; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-LABEL: v_fshr_v2i16_4_8:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: s_bfe_u32 s4, 12, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
-; GFX6-NEXT: s_bfe_u32 s4, 3, 0x100000
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
-; GFX6-NEXT: s_bfe_u32 s4, 8, 0x100000
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v2
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
-; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
-; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s6, 14, 0x100000
+; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX6-NEXT: s_lshl_b32 s0, s0, s4
-; GFX6-NEXT: s_lshr_b32 s5, s5, s6
+; GFX6-NEXT: s_lshl_b32 s0, s0, 1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 14
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX6-NEXT: s_or_b32 s0, s0, s5
+; GFX6-NEXT: s_or_b32 s0, s0, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, 1
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
-; GFX6-NEXT: s_lshl_b32 s1, s1, s4
; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
-; GFX6-NEXT: s_lshr_b32 s4, s4, s6
+; GFX6-NEXT: s_lshl_b32 s1, s1, 1
+; GFX6-NEXT: s_lshr_b32 s4, s4, 14
; GFX6-NEXT: s_lshl_b32 s3, s3, 1
; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
; GFX6-NEXT: s_or_b32 s1, s1, s4
-; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshr_v2i16_ssv:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s5, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s6, 15, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s1
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_lshr_b32 s5, s5, s6
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_lshr_b32 s4, s4, 15
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
-; GFX8-NEXT: s_or_b32 s0, s0, s5
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_or_b32 s0, s0, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, 1
; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0
-; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
-; GFX8-NEXT: s_lshr_b32 s0, s0, s4
-; GFX8-NEXT: s_lshr_b32 s5, s3, s6
-; GFX8-NEXT: s_lshl_b32 s3, s3, s4
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
+; GFX8-NEXT: s_lshr_b32 s4, s3, 15
+; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
-; GFX8-NEXT: s_lshl_b32 s2, s2, s4
+; GFX8-NEXT: s_lshl_b32 s2, s2, 1
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_and_b32_e32 v2, 15, v1
; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
-; GFX8-NEXT: s_bfe_u32 s0, s3, 0x100000
-; GFX8-NEXT: s_or_b32 s2, s2, s5
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s3
+; GFX8-NEXT: s_or_b32 s2, s2, s4
; GFX8-NEXT: v_and_b32_e32 v1, 15, v1
-; GFX8-NEXT: s_lshr_b32 s0, s0, s4
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0
; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
; GFX6-LABEL: v_fshr_v2i16_svs:
; GFX6: ; %bb.0:
+; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
-; GFX6-NEXT: s_or_b32 s2, s3, s2
-; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15
-; GFX6-NEXT: s_bfe_u32 s4, 14, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s3
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
+; GFX6-NEXT: s_lshl_b32 s0, s0, 1
+; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2
; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15
+; GFX6-NEXT: s_or_b32 s2, s3, s2
; GFX6-NEXT: v_or_b32_e32 v2, s0, v2
-; GFX6-NEXT: s_lshl_b32 s0, s1, s3
-; GFX6-NEXT: v_lshrrev_b32_e32 v3, s4, v3
+; GFX6-NEXT: s_lshl_b32 s0, s1, 1
+; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
; GFX6-NEXT: v_or_b32_e32 v3, s0, v3
; GFX6-NEXT: s_xor_b32 s0, s2, -1
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: s_and_b32 s2, s0, 15
; GFX6-NEXT: s_andn2_b32 s0, 15, s0
; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0
; GFX6-NEXT: s_and_b32 s0, s1, 15
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2
; GFX6-NEXT: s_andn2_b32 s1, 15, s1
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
-; GFX6-NEXT: s_bfe_u32 s0, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s1
; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshr_v2i16_svs:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s3
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 15
; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
-; GFX8-NEXT: s_lshl_b32 s0, s2, s3
+; GFX8-NEXT: s_lshl_b32 s0, s2, 1
; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX8-NEXT: v_or_b32_e32 v2, s0, v2
; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2
; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
-; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
; GFX6-NEXT: s_lshl_b32 s3, s3, 16
; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
; GFX6-NEXT: s_or_b32 s2, s3, s2
-; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s3, v0
-; GFX6-NEXT: s_bfe_u32 s4, s0, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s3, v1
+; GFX6-NEXT: s_bfe_u32 s3, s0, 0xf0001
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT: s_lshr_b32 s3, s3, 14
+; GFX6-NEXT: v_or_b32_e32 v0, s3, v0
; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001
-; GFX6-NEXT: s_lshr_b32 s4, s4, s5
-; GFX6-NEXT: s_lshr_b32 s3, s3, s5
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT: s_lshr_b32 s3, s3, 14
; GFX6-NEXT: s_xor_b32 s2, s2, -1
-; GFX6-NEXT: v_or_b32_e32 v0, s4, v0
; GFX6-NEXT: v_or_b32_e32 v1, s3, v1
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
; GFX6-NEXT: s_lshr_b32 s3, s2, 16
; GFX6-NEXT: s_and_b32 s4, s2, 15
; GFX6-NEXT: s_andn2_b32 s2, 15, s2
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
; GFX6-NEXT: s_lshr_b32 s0, s0, s2
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX6-NEXT: s_and_b32 s0, s3, 15
; GFX6-NEXT: s_lshl_b32 s1, s1, 1
; GFX6-NEXT: s_andn2_b32 s2, 15, s3
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: s_lshr_b32 s0, s0, s1
; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
-; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: v_fshr_v2i16_vss:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s3, s0, 0x100000
-; GFX8-NEXT: s_bfe_u32 s4, 15, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s0
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0
-; GFX8-NEXT: s_lshr_b32 s3, s3, s4
+; GFX8-NEXT: s_lshr_b32 s3, s3, 15
; GFX8-NEXT: v_mov_b32_e32 v2, 1
; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: s_lshr_b32 s3, s2, s4
-; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
-; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s3
+; GFX8-NEXT: s_lshr_b32 s3, s2, 15
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: s_xor_b32 s1, s1, -1
-; GFX8-NEXT: s_lshr_b32 s4, s1, 16
-; GFX8-NEXT: s_and_b32 s5, s1, 15
+; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
+; GFX8-NEXT: s_and_b32 s4, s1, 15
; GFX8-NEXT: s_andn2_b32 s1, 15, s1
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX8-NEXT: s_lshr_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: v_lshlrev_b16_e32 v1, s5, v1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v1
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
-; GFX8-NEXT: s_lshl_b32 s2, s2, s3
+; GFX8-NEXT: s_lshl_b32 s2, s2, 1
; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
-; GFX8-NEXT: s_and_b32 s0, s4, 15
-; GFX8-NEXT: s_andn2_b32 s1, 15, s4
+; GFX8-NEXT: s_and_b32 s0, s3, 15
+; GFX8-NEXT: s_andn2_b32 s1, 15, s3
; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0
-; GFX8-NEXT: s_bfe_u32 s0, s2, 0x100000
-; GFX8-NEXT: s_lshr_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s2
+; GFX8-NEXT: s_lshr_b32 s0, s0, 1
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshr_b32 s0, s0, s1
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: ; return to shader part epilog
;
; GFX6-NEXT: s_lshl_b32 s7, s7, 16
; GFX6-NEXT: s_or_b32 s6, s6, s7
; GFX6-NEXT: s_and_b32 s7, s8, 0xffff
-; GFX6-NEXT: s_bfe_u32 s8, 1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s9, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s10, 14, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s8
-; GFX6-NEXT: s_lshr_b32 s9, s9, s10
-; GFX6-NEXT: s_or_b32 s0, s0, s9
-; GFX6-NEXT: s_bfe_u32 s9, s4, 0xf0001
-; GFX6-NEXT: s_lshl_b32 s1, s1, s8
-; GFX6-NEXT: s_lshr_b32 s9, s9, s10
+; GFX6-NEXT: s_bfe_u32 s8, s3, 0xf0001
+; GFX6-NEXT: s_lshl_b32 s0, s0, 1
+; GFX6-NEXT: s_lshr_b32 s8, s8, 14
+; GFX6-NEXT: s_or_b32 s0, s0, s8
+; GFX6-NEXT: s_bfe_u32 s8, s4, 0xf0001
+; GFX6-NEXT: s_lshl_b32 s1, s1, 1
+; GFX6-NEXT: s_lshr_b32 s8, s8, 14
; GFX6-NEXT: s_xor_b32 s6, s6, -1
-; GFX6-NEXT: s_or_b32 s1, s1, s9
+; GFX6-NEXT: s_or_b32 s1, s1, s8
; GFX6-NEXT: s_lshl_b32 s3, s3, 1
-; GFX6-NEXT: s_lshr_b32 s9, s6, 16
-; GFX6-NEXT: s_and_b32 s11, s6, 15
+; GFX6-NEXT: s_lshr_b32 s8, s6, 16
+; GFX6-NEXT: s_and_b32 s9, s6, 15
; GFX6-NEXT: s_andn2_b32 s6, 15, s6
-; GFX6-NEXT: s_bfe_u32 s11, s11, 0x100000
+; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s11
+; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
+; GFX6-NEXT: s_lshl_b32 s0, s0, s9
; GFX6-NEXT: s_lshr_b32 s3, s3, s6
; GFX6-NEXT: s_or_b32 s0, s0, s3
-; GFX6-NEXT: s_and_b32 s3, s9, 15
+; GFX6-NEXT: s_and_b32 s3, s8, 15
; GFX6-NEXT: s_lshl_b32 s4, s4, 1
-; GFX6-NEXT: s_andn2_b32 s6, 15, s9
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_andn2_b32 s6, 15, s8
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s1, s1, s3
; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s6, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s6
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
; GFX6-NEXT: s_or_b32 s1, s1, s3
; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
-; GFX6-NEXT: s_lshl_b32 s2, s2, s8
-; GFX6-NEXT: s_lshr_b32 s3, s3, s10
+; GFX6-NEXT: s_lshl_b32 s2, s2, 1
+; GFX6-NEXT: s_lshr_b32 s3, s3, 14
; GFX6-NEXT: s_xor_b32 s4, s7, -1
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s5, 1
; GFX6-NEXT: s_and_b32 s5, s4, 15
; GFX6-NEXT: s_andn2_b32 s4, 15, s4
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s2, s2, s5
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshr_v3i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_lshr_b32 s9, s9, s10
-; GFX8-NEXT: s_or_b32 s0, s0, s9
-; GFX8-NEXT: s_lshl_b32 s6, s6, s8
-; GFX8-NEXT: s_lshr_b32 s9, s7, s10
-; GFX8-NEXT: s_lshl_b32 s2, s2, s8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_lshr_b32 s8, s8, 15
+; GFX8-NEXT: s_or_b32 s0, s0, s8
+; GFX8-NEXT: s_lshl_b32 s6, s6, 1
+; GFX8-NEXT: s_lshr_b32 s8, s7, 15
+; GFX8-NEXT: s_lshl_b32 s2, s2, 1
; GFX8-NEXT: s_xor_b32 s4, s4, -1
-; GFX8-NEXT: s_or_b32 s6, s6, s9
-; GFX8-NEXT: s_lshr_b32 s9, s4, 16
-; GFX8-NEXT: s_and_b32 s11, s4, 15
+; GFX8-NEXT: s_or_b32 s6, s6, s8
+; GFX8-NEXT: s_lshr_b32 s8, s4, 16
+; GFX8-NEXT: s_and_b32 s9, s4, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s11, s11, 0x100000
-; GFX8-NEXT: s_lshr_b32 s2, s2, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s11
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
+; GFX8-NEXT: s_lshr_b32 s2, s2, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_lshl_b32 s0, s0, s9
; GFX8-NEXT: s_lshr_b32 s2, s2, s4
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_and_b32 s2, s9, 15
-; GFX8-NEXT: s_lshl_b32 s7, s7, s8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_andn2_b32 s4, 15, s9
+; GFX8-NEXT: s_and_b32 s2, s8, 15
+; GFX8-NEXT: s_lshl_b32 s7, s7, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_andn2_b32 s4, 15, s8
; GFX8-NEXT: s_lshl_b32 s2, s6, s2
-; GFX8-NEXT: s_bfe_u32 s6, s7, 0x100000
-; GFX8-NEXT: s_lshr_b32 s6, s6, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
+; GFX8-NEXT: s_lshr_b32 s6, s6, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshr_b32 s4, s6, s4
; GFX8-NEXT: s_or_b32 s2, s2, s4
-; GFX8-NEXT: s_bfe_u32 s4, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s3
; GFX8-NEXT: s_and_b32 s5, s5, 0xffff
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
-; GFX8-NEXT: s_lshr_b32 s4, s4, s10
+; GFX8-NEXT: s_lshl_b32 s1, s1, 1
+; GFX8-NEXT: s_lshr_b32 s4, s4, 15
; GFX8-NEXT: s_or_b32 s1, s1, s4
-; GFX8-NEXT: s_lshl_b32 s3, s3, s8
+; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: s_xor_b32 s4, s5, -1
; GFX8-NEXT: s_and_b32 s5, s4, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s4
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000
-; GFX8-NEXT: s_lshr_b32 s3, s3, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshl_b32 s1, s1, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_or_b32 s1, s1, s3
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_fshr_v3i16:
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v8
-; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000
; GFX6-NEXT: v_bfe_u32 v8, v3, 1, 15
-; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_lshrrev_b32_e32 v8, s5, v8
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
; GFX6-NEXT: v_or_b32_e32 v0, v0, v8
; GFX6-NEXT: v_bfe_u32 v8, v4, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v8, s5, v8
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
; GFX6-NEXT: v_or_b32_e32 v1, v1, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v9, v9, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
-; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v2, s4, v2
-; GFX6-NEXT: v_lshrrev_b32_e32 v3, s5, v3
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
+; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v5
; GFX6-NEXT: v_and_b32_e32 v5, 15, v4
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
-; GFX6-NEXT: v_bfe_u32 v5, v5, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1
; GFX8-NEXT: v_lshrrev_b16_e32 v3, v4, v3
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fshr_v3i16:
; GFX6-NEXT: s_lshl_b32 s9, s11, 16
; GFX6-NEXT: s_and_b32 s10, s10, 0xffff
; GFX6-NEXT: s_or_b32 s9, s9, s10
-; GFX6-NEXT: s_bfe_u32 s10, 1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s11, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s12, 14, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s10
-; GFX6-NEXT: s_lshr_b32 s11, s11, s12
-; GFX6-NEXT: s_or_b32 s0, s0, s11
-; GFX6-NEXT: s_bfe_u32 s11, s5, 0xf0001
-; GFX6-NEXT: s_lshl_b32 s1, s1, s10
-; GFX6-NEXT: s_lshr_b32 s11, s11, s12
+; GFX6-NEXT: s_bfe_u32 s10, s4, 0xf0001
+; GFX6-NEXT: s_lshl_b32 s0, s0, 1
+; GFX6-NEXT: s_lshr_b32 s10, s10, 14
+; GFX6-NEXT: s_or_b32 s0, s0, s10
+; GFX6-NEXT: s_bfe_u32 s10, s5, 0xf0001
+; GFX6-NEXT: s_lshl_b32 s1, s1, 1
+; GFX6-NEXT: s_lshr_b32 s10, s10, 14
; GFX6-NEXT: s_xor_b32 s8, s8, -1
-; GFX6-NEXT: s_or_b32 s1, s1, s11
+; GFX6-NEXT: s_or_b32 s1, s1, s10
; GFX6-NEXT: s_lshl_b32 s4, s4, 1
-; GFX6-NEXT: s_lshr_b32 s11, s8, 16
-; GFX6-NEXT: s_and_b32 s13, s8, 15
+; GFX6-NEXT: s_lshr_b32 s10, s8, 16
+; GFX6-NEXT: s_and_b32 s11, s8, 15
; GFX6-NEXT: s_andn2_b32 s8, 15, s8
-; GFX6-NEXT: s_bfe_u32 s13, s13, 0x100000
+; GFX6-NEXT: s_and_b32 s11, 0xffff, s11
; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX6-NEXT: s_lshl_b32 s0, s0, s13
+; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
+; GFX6-NEXT: s_lshl_b32 s0, s0, s11
; GFX6-NEXT: s_lshr_b32 s4, s4, s8
; GFX6-NEXT: s_or_b32 s0, s0, s4
-; GFX6-NEXT: s_and_b32 s4, s11, 15
+; GFX6-NEXT: s_and_b32 s4, s10, 15
; GFX6-NEXT: s_lshl_b32 s5, s5, 1
-; GFX6-NEXT: s_andn2_b32 s8, 15, s11
-; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX6-NEXT: s_andn2_b32 s8, 15, s10
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
; GFX6-NEXT: s_lshl_b32 s1, s1, s4
; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
; GFX6-NEXT: s_lshr_b32 s4, s4, s5
; GFX6-NEXT: s_or_b32 s1, s1, s4
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
-; GFX6-NEXT: s_lshl_b32 s1, s2, s10
+; GFX6-NEXT: s_lshl_b32 s1, s2, 1
; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001
-; GFX6-NEXT: s_lshr_b32 s2, s2, s12
+; GFX6-NEXT: s_lshr_b32 s2, s2, 14
; GFX6-NEXT: s_or_b32 s1, s1, s2
-; GFX6-NEXT: s_lshl_b32 s2, s3, s10
+; GFX6-NEXT: s_lshl_b32 s2, s3, 1
; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001
-; GFX6-NEXT: s_lshr_b32 s3, s3, s12
+; GFX6-NEXT: s_lshr_b32 s3, s3, 14
; GFX6-NEXT: s_xor_b32 s5, s9, -1
; GFX6-NEXT: s_or_b32 s2, s2, s3
; GFX6-NEXT: s_lshl_b32 s3, s6, 1
; GFX6-NEXT: s_lshr_b32 s6, s5, 16
; GFX6-NEXT: s_and_b32 s7, s5, 15
; GFX6-NEXT: s_andn2_b32 s5, 15, s5
-; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000
+; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
; GFX6-NEXT: s_lshl_b32 s1, s1, s7
; GFX6-NEXT: s_lshr_b32 s3, s3, s5
; GFX6-NEXT: s_or_b32 s1, s1, s3
; GFX6-NEXT: s_and_b32 s3, s6, 15
; GFX6-NEXT: s_andn2_b32 s5, 15, s6
-; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
; GFX6-NEXT: s_lshl_b32 s2, s2, s3
; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
-; GFX6-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX6-NEXT: s_and_b32 s4, 0xffff, s5
; GFX6-NEXT: s_lshr_b32 s3, s3, s4
; GFX6-NEXT: s_or_b32 s2, s2, s3
-; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
; GFX6-NEXT: s_or_b32 s1, s1, s2
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_fshr_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000
+; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_lshr_b32 s7, s2, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_lshr_b32 s9, s9, s10
-; GFX8-NEXT: s_or_b32 s0, s0, s9
-; GFX8-NEXT: s_lshl_b32 s6, s6, s8
-; GFX8-NEXT: s_lshr_b32 s9, s7, s10
-; GFX8-NEXT: s_lshl_b32 s2, s2, s8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 1
+; GFX8-NEXT: s_lshr_b32 s8, s8, 15
+; GFX8-NEXT: s_or_b32 s0, s0, s8
+; GFX8-NEXT: s_lshl_b32 s6, s6, 1
+; GFX8-NEXT: s_lshr_b32 s8, s7, 15
+; GFX8-NEXT: s_lshl_b32 s2, s2, 1
; GFX8-NEXT: s_xor_b32 s4, s4, -1
-; GFX8-NEXT: s_or_b32 s6, s6, s9
-; GFX8-NEXT: s_lshr_b32 s9, s4, 16
-; GFX8-NEXT: s_and_b32 s11, s4, 15
+; GFX8-NEXT: s_or_b32 s6, s6, s8
+; GFX8-NEXT: s_lshr_b32 s8, s4, 16
+; GFX8-NEXT: s_and_b32 s9, s4, 15
; GFX8-NEXT: s_andn2_b32 s4, 15, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s11, s11, 0x100000
-; GFX8-NEXT: s_lshr_b32 s2, s2, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s11
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
+; GFX8-NEXT: s_lshr_b32 s2, s2, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_lshl_b32 s0, s0, s9
; GFX8-NEXT: s_lshr_b32 s2, s2, s4
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_and_b32 s2, s9, 15
-; GFX8-NEXT: s_lshl_b32 s7, s7, s8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_andn2_b32 s4, 15, s9
+; GFX8-NEXT: s_and_b32 s2, s8, 15
+; GFX8-NEXT: s_lshl_b32 s7, s7, 1
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_andn2_b32 s4, 15, s8
; GFX8-NEXT: s_lshl_b32 s2, s6, s2
-; GFX8-NEXT: s_bfe_u32 s6, s7, 0x100000
-; GFX8-NEXT: s_lshr_b32 s6, s6, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
+; GFX8-NEXT: s_lshr_b32 s6, s6, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_lshr_b32 s4, s6, s4
; GFX8-NEXT: s_or_b32 s2, s2, s4
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
-; GFX8-NEXT: s_bfe_u32 s6, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s6, 0xffff, s3
; GFX8-NEXT: s_or_b32 s0, s0, s2
; GFX8-NEXT: s_lshr_b32 s2, s1, 16
; GFX8-NEXT: s_lshr_b32 s4, s3, 16
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
-; GFX8-NEXT: s_lshr_b32 s6, s6, s10
+; GFX8-NEXT: s_lshl_b32 s1, s1, 1
+; GFX8-NEXT: s_lshr_b32 s6, s6, 15
; GFX8-NEXT: s_or_b32 s1, s1, s6
-; GFX8-NEXT: s_lshl_b32 s2, s2, s8
-; GFX8-NEXT: s_lshr_b32 s6, s4, s10
-; GFX8-NEXT: s_lshl_b32 s3, s3, s8
+; GFX8-NEXT: s_lshl_b32 s2, s2, 1
+; GFX8-NEXT: s_lshr_b32 s6, s4, 15
+; GFX8-NEXT: s_lshl_b32 s3, s3, 1
; GFX8-NEXT: s_xor_b32 s5, s5, -1
; GFX8-NEXT: s_or_b32 s2, s2, s6
; GFX8-NEXT: s_lshr_b32 s6, s5, 16
; GFX8-NEXT: s_and_b32 s7, s5, 15
; GFX8-NEXT: s_andn2_b32 s5, 15, s5
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX8-NEXT: s_lshr_b32 s3, s3, s8
-; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
+; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
; GFX8-NEXT: s_lshl_b32 s1, s1, s7
; GFX8-NEXT: s_lshr_b32 s3, s3, s5
; GFX8-NEXT: s_or_b32 s1, s1, s3
; GFX8-NEXT: s_and_b32 s3, s6, 15
-; GFX8-NEXT: s_lshl_b32 s4, s4, s8
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_lshl_b32 s4, s4, 1
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_andn2_b32 s5, 15, s6
; GFX8-NEXT: s_lshl_b32 s2, s2, s3
-; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX8-NEXT: s_lshr_b32 s3, s3, s8
-; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
+; GFX8-NEXT: s_lshr_b32 s3, s3, 1
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
; GFX8-NEXT: s_lshr_b32 s3, s3, s4
; GFX8-NEXT: s_or_b32 s2, s2, s3
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11
; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10
; GFX6-NEXT: v_or_b32_e32 v9, v9, v10
-; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000
; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15
-; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000
-; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
-; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10
+; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
+; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
; GFX6-NEXT: v_or_b32_e32 v0, v0, v10
; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
-; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10
+; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
+; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
; GFX6-NEXT: v_or_b32_e32 v1, v1, v10
; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT: v_bfe_u32 v11, v11, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
-; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
-; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v2, s4, v2
-; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4
+; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
+; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
-; GFX6-NEXT: v_lshlrev_b32_e32 v3, s4, v3
-; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4
+; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
+; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9
; GFX6-NEXT: v_and_b32_e32 v8, 15, v6
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
-; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
; GFX6-NEXT: v_and_b32_e32 v4, 15, v7
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
-; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
-; GFX6-NEXT: v_bfe_u32 v5, v6, 0, 16
+; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0
; GFX8-NEXT: v_lshrrev_b16_e32 v2, v7, v2
; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v1
-; GFX8-NEXT: v_lshrrev_b16_e32 v6, 15, v3
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
-; GFX8-NEXT: v_mov_b32_e32 v6, 1
-; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
-; GFX8-NEXT: v_or_b32_e32 v1, v1, v7
-; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v3
-; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5
-; GFX8-NEXT: v_and_b32_e32 v8, 15, v5
+; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1
+; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
+; GFX8-NEXT: v_mov_b32_e32 v4, 1
+; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v6
+; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v3
+; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5
+; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v4
+; GFX8-NEXT: v_and_b32_e32 v7, 15, v4
+; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
+; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6
+; GFX8-NEXT: v_lshlrev_b16_e32 v2, v7, v2
+; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v6
+; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
+; GFX8-NEXT: v_and_b32_e32 v4, 15, v5
; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
-; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v7
-; GFX8-NEXT: v_lshlrev_b16_e32 v4, v8, v4
-; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v7
-; GFX8-NEXT: v_or_b32_e32 v4, v4, v5
-; GFX8-NEXT: v_and_b32_e32 v5, 15, v6
-; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
-; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
-; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1
-; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3
+; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1
+; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3
; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_fshr_v4i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v4, v4
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX8-NEXT: v_not_b32_e32 v4, v4
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
+; GFX9-NEXT: v_not_b32_e32 v4, v4
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4
+; GFX10-NEXT: v_not_b32_e32 v5, v4
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v4
+; GFX11-NEXT: v_not_b32_e32 v5, v4
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX6-LABEL: v_fshr_i64_ssv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
-; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: v_and_b32_e32 v0, 63, v0
; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0
; GFX8-LABEL: v_fshr_i64_ssv:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, 63, v0
; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
; GFX9-LABEL: v_fshr_i64_ssv:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_b32_e32 v0, 63, v0
; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
;
; GFX10-LABEL: v_fshr_i64_ssv:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v0
; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
;
; GFX11-LABEL: v_fshr_i64_ssv:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX11-NEXT: v_not_b32_e32 v1, v0
; GFX11-NEXT: v_and_b32_e32 v0, 63, v0
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v8, v8
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8
; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX6-NEXT: v_not_b32_e32 v8, v10
; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v8, v8
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX8-NEXT: v_not_b32_e32 v8, v10
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v8, v8
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10
+; GFX9-NEXT: v_not_b32_e32 v8, v10
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8
-; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10
+; GFX10-NEXT: v_not_b32_e32 v9, v8
+; GFX10-NEXT: v_not_b32_e32 v11, v10
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v8
-; GFX11-NEXT: v_xor_b32_e32 v11, -1, v10
+; GFX11-NEXT: v_not_b32_e32 v9, v8
+; GFX11-NEXT: v_not_b32_e32 v11, v10
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_and_b32_e32 v8, 63, v8
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v8, v8
; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX8-NEXT: v_not_b32_e32 v8, v8
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
+; GFX9-NEXT: v_not_b32_e32 v8, v8
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v9, v8
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v9, v8
; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX6-LABEL: v_fshr_i128_ssv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s9, 0
; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
; GFX8-LABEL: v_fshr_i128_ssv:
; GFX8: ; %bb.0:
; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_mov_b32 s9, 0
; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
; GFX9-LABEL: v_fshr_i128_ssv:
; GFX9: ; %bb.0:
; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: s_mov_b32 s9, 0
; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0
; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
;
; GFX10-LABEL: v_fshr_i128_ssv:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v0
; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0
; GFX10-NEXT: s_mov_b32 s9, 0
; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
;
; GFX11-LABEL: v_fshr_i128_ssv:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX11-NEXT: v_not_b32_e32 v1, v0
; GFX11-NEXT: s_lshr_b32 s8, s1, 31
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
; GFX11-NEXT: s_mov_b32 s9, 0
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX6-NEXT: v_not_b32_e32 v16, v16
; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], 1
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX6-NEXT: v_not_b32_e32 v8, v20
; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
; GFX6-NEXT: v_or_b32_e32 v3, v19, v3
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX8-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX8-NEXT: v_not_b32_e32 v16, v16
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX8-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
-; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX8-NEXT: v_not_b32_e32 v8, v20
; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
; GFX8-NEXT: v_or_b32_e32 v3, v19, v3
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16
-; GFX9-NEXT: v_xor_b32_e32 v16, -1, v16
+; GFX9-NEXT: v_not_b32_e32 v16, v16
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16
; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
-; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20
+; GFX9-NEXT: v_not_b32_e32 v8, v20
; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
; GFX9-NEXT: v_or_b32_e32 v3, v19, v3
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor_b32_e32 v17, -1, v16
+; GFX10-NEXT: v_not_b32_e32 v17, v16
; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16
; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4
-; GFX10-NEXT: v_xor_b32_e32 v16, -1, v20
+; GFX10-NEXT: v_not_b32_e32 v16, v20
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4
; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_xor_b32_e32 v17, -1, v16
+; GFX11-NEXT: v_not_b32_e32 v17, v16
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_or_b32_e32 v16, v16, v18
; GFX11-NEXT: v_or_b32_e32 v17, v17, v19
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0
-; GFX11-NEXT: v_xor_b32_e32 v16, -1, v20
+; GFX11-NEXT: v_not_b32_e32 v16, v20
; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
; GFX9-NEXT: s_and_b32 s2, s4, 0xffff
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s2
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s2, s4, 0xffff
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s2
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_and_b32 s1, s4, 0xffff
; GFX7-NEXT: v_lshl_b32_e32 v2, s1, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v3, s0, v0
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, s1
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e64 v1, v0, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX11-NEXT: v_not_b32_e32 v3, v1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s1, 0xffff
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v1, s1
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 s1, 0xffff
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v1, s1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v1
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v3, s0, v0
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v2, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v3, v1, v0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX9-NEXT: s_and_b32 s1, s2, 0xffff
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v4, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v1, s1
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, v0, v1
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1
; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v3, v0, v1
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, s0
-; GFX10-NEXT: v_xor_b32_e32 v4, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v4, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e64 v1, v0, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v0, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_xor_b32_e32 v4, -1, v1
+; GFX11-NEXT: v_not_b32_e32 v4, v1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, 0xffff
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, v0, v1
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v3, v0, v1
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v3, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, v4, v2, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX9-NEXT: v_lshlrev_b32_e64 v3, v0, s3
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s2
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_or_b32 v4, v1, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e64 v3, v0, s3
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s2
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v4, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX7-NEXT: v_lshl_b32_e32 v3, s2, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_and_b32_e32 v0, v1, v0
; GFX7-NEXT: v_or_b32_e32 v4, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, s2
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v2, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s1
; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v0, vcc_lo
; GFX11-NEXT: v_lshlrev_b32_e64 v3, v1, s2
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v4
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_or_b32 v5, v5, v2, v3
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s2
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_and_or_b32 v4, v3, v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s2
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_and_b32_e32 v1, v3, v1
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v1, v3, v1
; GFX7-NEXT: v_or_b32_e32 v4, v1, v0
; GFX7-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v3, v3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v0
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v6, v2, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v6, v2, s1
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_lshl_b32_e32 v6, s0, v2
; GFX7-NEXT: v_lshl_b32_e32 v2, 0xffff, v2
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
-; GFX7-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX7-NEXT: v_not_b32_e32 v2, v2
; GFX7-NEXT: v_mov_b32_e32 v3, 0
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v3, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v5
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v4
+; GFX10-NEXT: v_not_b32_e32 v3, v4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX10-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX11-NEXT: v_lshlrev_b32_e64 v4, v3, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v3, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v5
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v4
+; GFX11-NEXT: v_not_b32_e32 v3, v4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v3, v3, s0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX9-NEXT: v_not_b32_e32 v3, v3
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
; GFX9-NEXT: v_mov_b32_e32 v5, 0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX8-NEXT: v_not_b32_e32 v3, v3
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
; GFX8-NEXT: v_mov_b32_e32 v5, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v3, v2
; GFX7-NEXT: v_lshl_b32_e32 v3, 0xffff, v3
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX7-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX7-NEXT: v_not_b32_e32 v3, v3
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
; GFX7-NEXT: v_mov_b32_e32 v5, 0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v6
; GFX10-NEXT: v_lshlrev_b32_e64 v5, v4, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX10-NEXT: v_not_b32_e32 v3, v5
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX10-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX11-NEXT: v_lshlrev_b32_e64 v5, v4, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v4, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX11-NEXT: v_not_b32_e32 v3, v5
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX11-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_or_b32 v6, v1, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v0, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v6, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v0, s8
; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_and_b32_e32 v0, v1, v0
; GFX7-NEXT: v_or_b32_e32 v5, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v0, s8
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v4, v1, s1
; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v5, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s9
; GFX10-NEXT: v_cndmask_b32_e32 v0, s8, v0, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v5, v2
; GFX11-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9
; GFX11-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s8
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_and_or_b32 v6, v2, v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s8
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_and_b32_e32 v1, v2, v1
; GFX8-NEXT: v_or_b32_e32 v6, v1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v1, v2, v1
; GFX7-NEXT: v_or_b32_e32 v5, v1, v0
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v5, v3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo
; GFX11-NEXT: v_lshlrev_b32_e32 v4, v2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v5, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX9-NEXT: v_mov_b32_e32 v8, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_mov_b32_e32 v7, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX8-NEXT: v_mov_b32_e32 v8, 0
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_mov_b32 s10, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e64 v7, v0, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s1
; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v1
-; GFX10-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX10-NEXT: v_not_b32_e32 v7, v7
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0
; GFX11-NEXT: v_lshlrev_b32_e64 v7, v0, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e64 v0, v0, s1
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v1
-; GFX11-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX11-NEXT: v_not_b32_e32 v7, v7
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_mov_b32_e32 v8, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX9-NEXT: v_mov_b32_e32 v9, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_mov_b32_e32 v8, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX8-NEXT: v_mov_b32_e32 v9, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_mov_b32 s10, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
; GFX10-NEXT: v_lshlrev_b32_e64 v8, v0, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v2, v8
; GFX10-NEXT: v_mov_b32_e32 v8, 0
; GFX10-NEXT: v_mov_b32_e32 v9, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v2, v8
; GFX11-NEXT: v_mov_b32_e32 v8, 0
; GFX11-NEXT: v_mov_b32_e32 v9, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11]
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_or_b32 v9, v1, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: v_mov_b32_e32 v1, s17
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11]
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v9, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v0, s16
; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11]
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_and_b32_e32 v0, v1, v0
; GFX7-NEXT: v_or_b32_e32 v9, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v0, s16
; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v12
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s9
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v9, v2
; GFX10-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s10, s0
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s11, s1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, s9
; GFX11-NEXT: v_cmp_eq_u32_e64 s5, 7, v12
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v9, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, s8, v1, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s10, s0
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s20
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_and_or_b32 v9, v2, v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s12
; GFX9-NEXT: v_mov_b32_e32 v1, s13
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s20
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_and_b32_e32 v1, v2, v1
; GFX8-NEXT: v_or_b32_e32 v9, v1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s12
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11]
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v1, v2, v1
; GFX7-NEXT: v_or_b32_e32 v9, v1, v0
; GFX7-NEXT: v_mov_b32_e32 v0, s12
; GFX10-NEXT: v_lshlrev_b32_sdwa v8, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v2, s9
-; GFX10-NEXT: v_xor_b32_e32 v9, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v9, v3
; GFX10-NEXT: v_cndmask_b32_e32 v2, s8, v2, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s10, s0
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, s11, s1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v2, s9
; GFX11-NEXT: v_lshlrev_b32_e32 v8, v1, v0
-; GFX11-NEXT: v_xor_b32_e32 v9, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v9, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v2, s8, v2, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v2, v2, s10, s0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v4, vcc
; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX8-NEXT: s_waitcnt vmcnt(1)
; GFX8-NEXT: v_cndmask_b32_e32 v11, v3, v4, vcc
; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX7-NEXT: s_mov_b32 s18, -1
; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_lshlrev_b32_e64 v11, v2, 0xffff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v2, s5
; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 7, v0
-; GFX10-NEXT: v_xor_b32_e32 v11, -1, v11
+; GFX10-NEXT: v_not_b32_e32 v11, v11
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
; GFX11-NEXT: v_lshlrev_b32_e64 v11, v2, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v2, s5
; GFX11-NEXT: v_cmp_eq_u32_e64 s5, 7, v0
-; GFX11-NEXT: v_xor_b32_e32 v11, -1, v11
+; GFX11-NEXT: v_not_b32_e32 v11, v11
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX8-NEXT: s_waitcnt vmcnt(1)
; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0
; GFX7-NEXT: s_mov_b32 s18, -1
; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v12, v3, 0xffff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v12
+; GFX10-NEXT: v_not_b32_e32 v3, v12
; GFX10-NEXT: v_mov_b32_e32 v12, 0
; GFX10-NEXT: v_mov_b32_e32 v13, 0
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e64 v12, v3, 0xffff
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v3, v2
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v12
+; GFX11-NEXT: v_not_b32_e32 v3, v12
; GFX11-NEXT: v_mov_b32_e32 v12, 0
; GFX11-NEXT: v_mov_b32_e32 v13, 0
; GFX11-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: s_and_b32 s2, s4, 0xff
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s2
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_and_b32 s2, s4, 0xff
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s2
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: s_and_b32 s1, s4, 0xff
; GFX7-NEXT: v_lshl_b32_e32 v1, s1, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xff, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, s1
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e64 v1, v0, 0xff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v0, s1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX11-NEXT: v_not_b32_e32 v3, v1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_movk_i32 s1, 0xff
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v1, s1
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_movk_i32 s1, 0xff
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v1, s1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v1, s0, v1
; GFX7-NEXT: v_or_b32_e32 v0, v1, v0
; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v2, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v3, v1, v0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, s0, v2, v3
; GFX9-NEXT: s_and_b32 s1, s2, 0xff
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
-; GFX9-NEXT: v_xor_b32_e32 v4, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v4, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v1, s1
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, v0, v1
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1
; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v0, s0
-; GFX10-NEXT: v_xor_b32_e32 v4, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v4, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e64 v1, v0, 0xff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v0, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_xor_b32_e32 v4, -1, v1
+; GFX11-NEXT: v_not_b32_e32 v4, v1
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_movk_i32 s0, 0xff
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v3, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 3, v1
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_and_b32_e32 v3, v0, v1
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_and_b32_e32 v0, v0, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e64 v1, v0, 0xff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX10-NEXT: v_not_b32_e32 v3, v1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v3, v0, v1
; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_and_or_b32 v2, v4, v2, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX9-NEXT: v_lshlrev_b32_e64 v3, v0, s3
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s2
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_or_b32 v4, v1, v0, v3
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e64 v3, v0, s3
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s2
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v4, v0, v3
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
; GFX7-NEXT: v_lshl_b32_e32 v3, s2, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xff, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_and_b32_e32 v0, v1, v0
; GFX7-NEXT: v_or_b32_e32 v3, v0, v3
; GFX7-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v1, s2
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v2, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s1
; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v0, vcc_lo
; GFX11-NEXT: v_lshlrev_b32_e64 v3, v1, s2
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v4
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v2, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_and_or_b32 v5, v5, v2, v3
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s2
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_and_or_b32 v4, v3, v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s2
; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_and_b32_e32 v1, v3, v1
; GFX8-NEXT: v_or_b32_e32 v4, v1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v1, v3, v1
; GFX7-NEXT: v_or_b32_e32 v3, v1, v0
; GFX7-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v3, v3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_cndmask_b32_e32 v5, s0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v0
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v3, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4)
; GFX11-NEXT: v_and_or_b32 v5, v5, v3, v2
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v6, v2, s1
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v2, s0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
-; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX9-NEXT: v_not_b32_e32 v2, v2
; GFX9-NEXT: v_mov_b32_e32 v3, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v6, v2, s1
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
-; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX8-NEXT: v_not_b32_e32 v2, v2
; GFX8-NEXT: v_mov_b32_e32 v3, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v5
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_lshl_b32_e32 v4, s0, v2
; GFX7-NEXT: v_lshl_b32_e32 v2, 0xff, v2
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3
-; GFX7-NEXT: v_xor_b32_e32 v2, -1, v2
+; GFX7-NEXT: v_not_b32_e32 v2, v2
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v3
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e64 v4, v3, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v3, s0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v5
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v4
+; GFX10-NEXT: v_not_b32_e32 v3, v4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX10-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX11-NEXT: v_lshlrev_b32_e64 v4, v3, 0xff
; GFX11-NEXT: v_lshlrev_b32_e64 v2, v3, s0
; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v5
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v4
+; GFX11-NEXT: v_not_b32_e32 v3, v4
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX9-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b32_e64 v3, v3, s0
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX9-NEXT: v_not_b32_e32 v3, v3
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
; GFX9-NEXT: v_mov_b32_e32 v5, 0
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6
-; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX8-NEXT: v_not_b32_e32 v3, v3
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
; GFX8-NEXT: v_mov_b32_e32 v5, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v3, v2
; GFX7-NEXT: v_lshl_b32_e32 v3, 0xff, v3
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4
-; GFX7-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX7-NEXT: v_not_b32_e32 v3, v3
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v6
; GFX10-NEXT: v_lshlrev_b32_e64 v5, v4, 0xff
; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX10-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX10-NEXT: v_not_b32_e32 v3, v5
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX10-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX11-NEXT: v_lshlrev_b32_e64 v5, v4, 0xff
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v4, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_xor_b32_e32 v3, -1, v5
+; GFX11-NEXT: v_not_b32_e32 v3, v5
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc_lo
; GFX11-NEXT: v_and_or_b32 v4, v4, v3, v2
; GFX9-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_or_b32 v6, v1, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v0, s8
; GFX9-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4
; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s5
; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, v1, v0
; GFX8-NEXT: v_or_b32_e32 v6, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v0, s8
; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0
; GFX7-NEXT: v_lshl_b32_e32 v0, 0xff, v0
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3]
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_and_b32_e32 v0, v1, v0
; GFX7-NEXT: v_or_b32_e32 v5, v0, v2
; GFX7-NEXT: v_mov_b32_e32 v0, s8
; GFX10-NEXT: v_lshlrev_b32_e64 v2, v1, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v4, v1, s1
; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v6
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX10-NEXT: v_not_b32_e32 v5, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s9
; GFX10-NEXT: v_cndmask_b32_e32 v0, s8, v0, vcc_lo
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v6
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s10, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX11-NEXT: v_not_b32_e32 v5, v2
; GFX11-NEXT: v_cndmask_b32_e64 v7, v0, s11, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9
; GFX11-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11
; GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s8
; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_and_or_b32 v6, v2, v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s8
; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_and_b32_e32 v1, v2, v1
; GFX8-NEXT: v_or_b32_e32 v6, v1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3]
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v1, v2, v1
; GFX7-NEXT: v_or_b32_e32 v5, v1, v0
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v6
; GFX10-NEXT: v_lshlrev_b32_e64 v3, v2, 0xff
; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX10-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX10-NEXT: v_not_b32_e32 v5, v3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: v_cndmask_b32_e32 v1, s4, v1, vcc_lo
; GFX11-NEXT: v_lshlrev_b32_e32 v4, v2, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s6, s0
-; GFX11-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX11-NEXT: v_not_b32_e32 v5, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v7, v1, s7, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX9-NEXT: v_mov_b32_e32 v8, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_mov_b32_e32 v7, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX8-NEXT: v_mov_b32_e32 v8, 0
; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_mov_b32 s10, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_lshlrev_b32_e64 v7, v0, 0xff
; GFX10-NEXT: v_lshlrev_b32_e64 v0, v0, s1
; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 3, v1
-; GFX10-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX10-NEXT: v_not_b32_e32 v7, v7
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo
; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v5, s0
; GFX11-NEXT: v_lshlrev_b32_e64 v7, v0, 0xff
; GFX11-NEXT: v_lshlrev_b32_e64 v0, v0, s1
; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 3, v1
-; GFX11-NEXT: v_xor_b32_e32 v7, -1, v7
+; GFX11-NEXT: v_not_b32_e32 v7, v7
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc_lo
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX9-NEXT: v_not_b32_e32 v1, v1
; GFX9-NEXT: v_mov_b32_e32 v8, 0
; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX9-NEXT: v_mov_b32_e32 v9, 0
; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_mov_b32_e32 v8, 0
; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX8-NEXT: v_mov_b32_e32 v9, 0
; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2
; GFX7-NEXT: v_lshl_b32_e32 v1, 0xff, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GFX7-NEXT: s_mov_b32 s10, -1
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
; GFX10-NEXT: v_lshlrev_b32_e64 v8, v0, 0xff
; GFX10-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX10-NEXT: v_xor_b32_e32 v2, -1, v8
+; GFX10-NEXT: v_not_b32_e32 v2, v8
; GFX10-NEXT: v_mov_b32_e32 v8, 0
; GFX10-NEXT: v_mov_b32_e32 v9, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, v0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX11-NEXT: v_cndmask_b32_e64 v3, v3, v7, s1
-; GFX11-NEXT: v_xor_b32_e32 v2, -1, v8
+; GFX11-NEXT: v_not_b32_e32 v2, v8
; GFX11-NEXT: v_mov_b32_e32 v8, 0
; GFX11-NEXT: v_mov_b32_e32 v9, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: %src0:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6-NEXT: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec
+ ; GFX6-NEXT: %ineg:vgpr_32, dead %4:sreg_64 = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec
; GFX6-NEXT: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec
; GFX6-NEXT: S_ENDPGM 0, implicit %smax
; GFX9-LABEL: name: smax_neg_abs_pattern_s32_vv
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
- ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec
- ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_ADD_I32_]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_2]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_4]]
; GFX9-LABEL: name: add_s32
; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
; GFX9-NEXT: {{ $}}
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64 = V_SUB_CO_U32_e64 [[COPY]], 64, 0, implicit $exec
- ; GFX6-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[COPY]], 64, 0, implicit $exec
+ ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]]
; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
- ; GFX6-NEXT: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX6-NEXT: S_ENDPGM 0, implicit %2
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec
- ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ADD_NC_U16_e64_]], 0, 16, implicit $exec
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ADD_NC_U16_e64_]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[V_SUB_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_NC_U16_e64 0, [[COPY]], 0, 64, 0, 0, implicit $exec
- ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_SUB_NC_U16_e64_]], 0, 16, implicit $exec
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_SUB_NC_U16_e64_]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s16) = G_CONSTANT i16 -64
; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: and_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: and_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]]
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
; WAVE32: liveins: $vgpr0, $sgpr0
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY $sgpr0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s32) = COPY $sgpr0
bb.0:
liveins: $vgpr0
- ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64
- ; GCN: liveins: $vgpr0
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
- ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s64) = G_ANYEXT %1
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc
- ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[COPY]], implicit-def $scc
+ ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s1) = G_TRUNC %0
%2:sgpr(s16) = G_ANYEXT %1
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s1) = G_TRUNC %0
%2:vgpr(s16) = G_ANYEXT %1
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: cannot select: %4:sgpr(s16) = G_ASHR %2:sgpr, %3:sgpr(s16) (in function: ashr_s16_s16_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_s32_vv)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: ashr_s16_vv_zext_to_s64)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_s32_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_s32_sv)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_s32_vs)
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_e64_]], 0, 16, implicit $exec
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_e64_]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
; GFX11-LABEL: name: ashr_s16_s16_vv_zext_to_s32
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_ASHRREV_I16_t16_e64_]], 0, 16, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_ASHRREV_I16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX8-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX8-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
- ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
- ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX10-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX11-LABEL: name: ashr_s16_vv_zext_to_s64
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX11-NEXT: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[ASHR]](s16)
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_ASHRREV_I16_t16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN-NEXT: %5:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc
+ ; GCN-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_64_xexec = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_F32_e64_]], implicit-def $scc
; GCN-NEXT: $vcc = COPY [[S_AND_B64_]]
; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc
; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def $scc
; GCN-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc
; GCN-NEXT: $vcc = COPY [[S_AND_B64_1]]
; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
- ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc
+ ; GCN-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def $scc
; GCN-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc
; GCN-NEXT: $vcc = COPY [[S_AND_B64_]]
; GCN-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
; GFX9PLUS-LABEL: name: test_build_vector_trunc_s_v2s16_zext_impdef_zext_constant
; GFX9PLUS: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX9PLUS-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
- ; GFX9PLUS-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[DEF]], 1048576, implicit-def $scc
- ; GFX9PLUS-NEXT: [[S_BFE_U32_1:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_MOV_B32_]], 1048576, implicit-def $scc
- ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_BFE_U32_]], [[S_BFE_U32_1]]
+ ; GFX9PLUS-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX9PLUS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_1]], [[DEF]], implicit-def $scc
+ ; GFX9PLUS-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX9PLUS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GFX9PLUS-NEXT: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_AND_B32_]], [[S_AND_B32_1]]
; GFX9PLUS-NEXT: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
%0:sgpr(s16) = G_IMPLICIT_DEF
%1:sgpr(s16) = G_CONSTANT i16 123
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s32) = G_CTPOP %0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
+ ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = COPY $sgpr0
%2:vgpr(s32) = G_CTPOP %1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
+ ; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:sgpr(s32) = G_CTPOP %0
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: cannot select: %4:sgpr(s16) = G_LSHR %2:sgpr, %3:sgpr(s16) (in function: lshr_s16_s16_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_s32_vv)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: lshr_s16_vv_zext_to_s64)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_s32_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_s32_sv)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_s32_vs)
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_e64_]], 0, 16, implicit $exec
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_e64_]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
; GFX11-LABEL: name: lshr_s16_s16_vv_zext_to_s32
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHRREV_B16_t16_e64_]], 0, 16, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHRREV_B16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX8-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX8-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
- ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
- ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX10-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX11-LABEL: name: lshr_s16_vv_zext_to_s64
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX11-NEXT: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16)
- ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[LSHR]](s16)
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHRREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: or_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: or_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]]
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_OR_B64_]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
; WAVE32: liveins: $vgpr0, $sgpr0
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]]
; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4, implicit %3
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]], implicit [[V_ADD_CO_U32_e64_]]
; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]]
; GFX9-LABEL: name: add_p3_vgpr_vgpr_vgpr
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_ADD_CO_U32_e64_]], [[COPY2]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]]
; GFX9-LABEL: name: add_p5_vgpr_vgpr_vgpr
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]]
; GFX9-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
- ; GFX8-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec
- ; GFX8-NEXT: S_ENDPGM 0, implicit %4
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[V_ADD_CO_U32_e64_]], 0, implicit $exec
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_2]]
; GFX9-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-NEXT: {{ $}}
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
- ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_BFE_I32_]], 1048576, implicit-def $scc
- ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[S_BFE_I32_]], implicit-def $scc
+ ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s1) = G_TRUNC %0
%2:sgpr(s16) = G_SEXT %1
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec
- ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_BFE_I32_e64_]], 0, 16, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_BFE_I32_e64_]], implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s1) = G_TRUNC %0
%2:vgpr(s16) = G_SEXT %1
# ERR-NOT: remark
# ERR: remark: <unknown>:0:0: cannot select: %4:sgpr(s16) = G_SHL %2:sgpr, %3:sgpr(s16) (in function: shl_s16_s16_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_s32_vv)
-# ERR-NEXT: remark: <unknown>:0:0: cannot select: %5:vgpr(s64) = G_ZEXT %4:vgpr(s16) (in function: shl_s16_vv_zext_to_s64)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_s32_ss)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_s32_sv)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_s32_vs)
; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_e64_]], 0, 16, implicit $exec
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_e64_]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
; GFX11-LABEL: name: shl_s16_s16_vv_zext_to_s32
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
- ; GFX11-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_LSHLREV_B16_t16_e64_]], 0, 16, implicit $exec
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_BFE_U32_e64_]]
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[V_LSHLREV_B16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX8-LABEL: name: shl_s16_vv_zext_to_s64
; GFX8: liveins: $vgpr0, $vgpr1
; GFX8-NEXT: {{ $}}
- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX8-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
- ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
- ; GFX8-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
+ ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX9-LABEL: name: shl_s16_vv_zext_to_s64
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
- ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
- ; GFX9-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX10-LABEL: name: shl_s16_vv_zext_to_s64
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10-NEXT: {{ $}}
- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
- ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
- ; GFX10-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX10-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX10-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
; GFX11-LABEL: name: shl_s16_vv_zext_to_s64
; GFX11: liveins: $vgpr0, $vgpr1
; GFX11-NEXT: {{ $}}
- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
- ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
- ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX11-NEXT: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
- ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[SHL]](s16)
- ; GFX11-NEXT: S_ENDPGM 0, implicit [[ZEXT]](s64)
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX11-NEXT: [[V_LSHLREV_B16_t16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_t16_e64 [[COPY1]], [[COPY]], implicit $exec
+ ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GFX11-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_t16_e64_]], implicit $exec
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX11-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(s16) = G_TRUNC %0
; GFX6-LABEL: name: store_atomic_global_s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
- ; GFX6-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p1) :: (store monotonic (s32), addrspace 1)
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1)
; GFX7-LABEL: name: store_atomic_global_s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7-NEXT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s32), addrspace 1)
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s32), addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT-NEXT: {{ $}}
; GFX6-LABEL: name: store_atomic_global_s64
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6-NEXT: {{ $}}
- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
- ; GFX6-NEXT: G_STORE [[COPY1]](s64), [[COPY]](p1) :: (store monotonic (s64), addrspace 1)
+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1)
; GFX7-LABEL: name: store_atomic_global_s64
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
- ; GFX7-NEXT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic (s64), addrspace 1)
+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7-NEXT: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store monotonic (s64), addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT-NEXT: {{ $}}
; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GFX6-NEXT: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
- ; GFX6-NEXT: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec
- ; GFX6-NEXT: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec
- ; GFX6-NEXT: S_ENDPGM 0, implicit %9
+ ; GFX6-NEXT: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_SUB_CO_U32_e64_2:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_3:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[S_SUB_I32_]], [[V_SUB_CO_U32_e64_]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_SUB_CO_U32_e64_4:%[0-9]+]]:vgpr_32, dead [[V_SUB_CO_U32_e64_5:%[0-9]+]]:sreg_64 = V_SUB_CO_U32_e64 [[V_SUB_CO_U32_e64_2]], [[COPY2]], 0, implicit $exec
+ ; GFX6-NEXT: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_4]]
; GFX9-LABEL: name: sub_s32
; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
; GFX9-NEXT: {{ $}}
; WAVE64-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: xor_s1_vcc_vcc_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
- ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: xor_s1_vcc_copy_to_vcc
; WAVE32: liveins: $vgpr0, $vgpr1
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY1]], implicit $exec
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_1]], implicit $exec
- ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: S_ENDPGM 0, implicit [[S_XOR_B32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]]
; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY1]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE64-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE64-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE64-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE64-NEXT: S_ENDPGM 0, implicit [[S_XOR_B64_]]
; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64
; WAVE32: liveins: $vgpr0, $sgpr0
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec
; WAVE32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc
; WAVE32-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
- ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
+ ; WAVE32-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def $scc
; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]]
; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY1]]
%1:vgpr(s32) = COPY $vgpr0
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc
- ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], [[COPY]], implicit-def $scc
+ ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s16) = G_TRUNC %0
%2:sgpr(s32) = G_ZEXT %1
; GCN-LABEL: name: zext_sgpr_s32_to_sgpr_s64
; GCN: liveins: $sgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32_xexec_hi_and_sreg_32_xm0 = COPY $sgpr0
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s32) = COPY $sgpr0
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+ ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
+ ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_TRUNC %0
%2:vgpr(s32) = G_ZEXT %1
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
ret float %r
}
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_neg_a:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
ret float %r
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_neg_b:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg <2 x half> %b
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
ret float %r
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_neg_a_neg_b:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %b
%neg.b = fneg <2 x half> %b
%r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_neg_c:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%neg.c = fneg float %c
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
ret float %r
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_inline_literal_a:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
ret float %ret
}
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_inline_literal_b:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
ret float %ret
}
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, 1.0
; GFX906-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_fdot2_inline_literal_c:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, 1.0
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
ret float %ret
}
define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) {
; GFX9-LABEL: atomic_add_i32_2d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) {
; GFX9-LABEL: atomic_add_i32_3d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v[2:3], s[0:7] dmask:0x1 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_3d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) {
; GFX9-LABEL: atomic_add_i32_cube:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v[2:3], s[0:7] dmask:0x1 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_cube:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) {
; GFX9-LABEL: atomic_add_i32_1darray:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_1darray:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) {
; GFX9-LABEL: atomic_add_i32_2darray:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v[2:3], s[0:7] dmask:0x1 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_2darray:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) {
; GFX9-LABEL: atomic_add_i32_2dmsaa:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v2, v1, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX9-NEXT: image_atomic_add v0, v[2:3], s[0:7] dmask:0x1 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_2dmsaa:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v2, v1, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v1
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX9-LABEL: atomic_add_i32_2darraymsaa:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v3
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s8
-; GFX9-NEXT: v_perm_b32 v2, v4, v3, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v4, 16, v2
; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i32_2darraymsaa:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX10-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) {
; GFX9-LABEL: atomic_add_i64_2d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) {
; GFX9-LABEL: atomic_add_i64_3d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v3, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v[3:4], s[0:7] dmask:0x3 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_3d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) {
; GFX9-LABEL: atomic_add_i64_cube:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v3, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v[3:4], s[0:7] dmask:0x3 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_cube:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) {
; GFX9-LABEL: atomic_add_i64_1darray:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_1darray:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) {
; GFX9-LABEL: atomic_add_i64_2darray:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v3, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v[3:4], s[0:7] dmask:0x3 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_2darray:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) {
; GFX9-LABEL: atomic_add_i64_2dmsaa:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v3, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX9-NEXT: image_atomic_add v[0:1], v[3:4], s[0:7] dmask:0x3 unorm glc a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_2dmsaa:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
+; GFX10-NEXT: v_lshl_or_b32 v3, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX10-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX9-LABEL: atomic_add_i64_2darraymsaa:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v4
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v2, v3, v2, s8
-; GFX9-NEXT: v_perm_b32 v3, v5, v4, s8
+; GFX9-NEXT: v_lshl_or_b32 v3, v5, 16, v3
; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: atomic_add_i64_2darraymsaa:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
-; GFX10-NEXT: v_perm_b32 v3, v5, v4, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: s_mov_b32 s2, s4
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX10-NEXT: v_lshl_or_b32 v3, v5, 16, v4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX10-NEXT: s_mov_b32 s5, s7
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4 v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: v_perm_b32 v2, v2, v4, s12
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_c_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b64 s[14:15], exec
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v2, v3, v2, s12
+; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: v_perm_b32 v2, v2, v4, s12
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_b_cl v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_wqm_b64 exec, exec
; GFX9-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: v_mov_b32_e32 v3, v4
-; GFX9-NEXT: v_perm_b32 v2, v5, v2, s12
+; GFX9-NEXT: v_lshl_or_b32 v2, v5, 16, v2
; GFX9-NEXT: s_and_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10NSA-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
; GFX10NSA-NEXT: s_mov_b32 s9, s11
; GFX10NSA-NEXT: s_mov_b32 s10, s12
; GFX10NSA-NEXT: s_mov_b32 s11, s13
-; GFX10NSA-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10NSA-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14
; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
; GFX9-LABEL: gather4_l_2d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX9-NEXT: image_gather4_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10NSA-LABEL: gather4_l_2d:
; GFX10NSA: ; %bb.0: ; %main_body
-; GFX10NSA-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX10NSA-NEXT: s_mov_b32 s4, s6
; GFX10NSA-NEXT: s_mov_b32 s5, s7
; GFX10NSA-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
; GFX9-LABEL: gather4_c_l_2d:
; GFX9: ; %bb.0: ; %main_body
-; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
+; GFX9-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-NEXT: v_perm_b32 v2, v2, v4, s12
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v0
; GFX9-NEXT: image_gather4_c_l v[0:3], v[1:3], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10NSA-LABEL: gather4_c_l_2d:
; GFX10NSA: ; %bb.0: ; %main_body
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_mov_b32 s4, s6
; GFX10NSA-NEXT: s_mov_b32 s5, s7
; GFX10NSA-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
; GFX9-LABEL: gather4_lz_2d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s12
+; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10NSA-LABEL: gather4_lz_2d:
; GFX10NSA: ; %bb.0: ; %main_body
-; GFX10NSA-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10NSA-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
+; GFX10NSA-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX10NSA-NEXT: s_mov_b32 s4, s6
; GFX10NSA-NEXT: s_mov_b32 s5, s7
; GFX10NSA-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
; GFX9-LABEL: gather4_c_lz_2d:
; GFX9: ; %bb.0: ; %main_body
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s12, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
+; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: v_perm_b32 v1, v2, v1, s12
+; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10NSA-LABEL: gather4_c_lz_2d:
; GFX10NSA: ; %bb.0: ; %main_body
-; GFX10NSA-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
+; GFX10NSA-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10NSA-NEXT: s_mov_b32 s0, s2
; GFX10NSA-NEXT: s_mov_b32 s1, s3
; GFX10NSA-NEXT: s_mov_b32 s2, s4
; GFX10NSA-NEXT: s_mov_b32 s3, s5
+; GFX10NSA-NEXT: v_lshl_or_b32 v1, v2, 16, v1
; GFX10NSA-NEXT: s_mov_b32 s4, s6
; GFX10NSA-NEXT: s_mov_b32 s5, s7
; GFX10NSA-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw:
; GFX9: ; %bb.0:
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s8
-; GFX9-NEXT: v_perm_b32 v1, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10PLUS-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10PLUS-NEXT: s_mov_b32 s0, s2
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
; GFX10PLUS-NEXT: s_mov_b32 s2, s4
+; GFX10PLUS-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
; GFX10PLUS-NEXT: s_mov_b32 s4, s6
; GFX10PLUS-NEXT: s_mov_b32 s5, s7
define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-NEXT: v_perm_b32 v10, v1, v0, s8
-; GFX9-NEXT: v_perm_b32 v11, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v7, v5
; GFX9-NEXT: v_mov_b32_e32 v8, v5
; GFX9-NEXT: v_mov_b32_e32 v9, v5
; GFX9-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: v_mov_b32_e32 v1, v6
; GFX9-NEXT: v_mov_b32_e32 v2, v7
; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-NEXT: v_perm_b32 v10, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v11, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: v_mov_b32_e32 v6, v5
; GFX10-NEXT: v_mov_b32_e32 v7, v5
; GFX10-NEXT: v_mov_b32_e32 v8, v5
; GFX10-NEXT: v_mov_b32_e32 v9, v5
+; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mov_b32_e32 v5, 0
-; GFX11-NEXT: v_perm_b32 v10, v1, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v3, v2, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: v_mov_b32_e32 v6, v5
; GFX11-NEXT: v_mov_b32_e32 v7, v5
; GFX11-NEXT: v_mov_b32_e32 v8, v5
; GFX11-NEXT: v_mov_b32_e32 v9, v5
+; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2
; GFX9-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-NEXT: v_perm_b32 v10, v1, v0, s8
-; GFX9-NEXT: v_perm_b32 v11, v3, v2, s8
+; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v6, v5
; GFX9-NEXT: v_mov_b32_e32 v7, v5
; GFX9-NEXT: v_mov_b32_e32 v8, v5
; GFX9-NEXT: v_mov_b32_e32 v9, v5
; GFX9-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: v_mov_b32_e32 v1, v6
; GFX9-NEXT: v_mov_b32_e32 v2, v7
; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mov_b32_e32 v5, 0
-; GFX10-NEXT: v_perm_b32 v10, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v11, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: v_mov_b32_e32 v6, v5
; GFX10-NEXT: v_mov_b32_e32 v7, v5
; GFX10-NEXT: v_mov_b32_e32 v8, v5
; GFX10-NEXT: v_mov_b32_e32 v9, v5
+; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_mov_b32_e32 v5, 0
-; GFX11-NEXT: v_perm_b32 v10, v1, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v3, v2, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: v_mov_b32_e32 v6, v5
; GFX11-NEXT: v_mov_b32_e32 v7, v5
; GFX11-NEXT: v_mov_b32_e32 v8, v5
; GFX11-NEXT: v_mov_b32_e32 v9, v5
+; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %r) {
; GFX9-LABEL: load_3d_v4f32_xyzw:
; GFX9: ; %bb.0:
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
-; GFX9-NEXT: v_perm_b32 v1, v1, v0, s8
+; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX9-NEXT: image_load v[0:3], v[1:2], s[0:7] dmask:0xf unorm a16
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: load_3d_v4f32_xyzw:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_perm_b32 v1, v1, v0, 0x5040100
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: s_mov_b32 s0, s2
; GFX10PLUS-NEXT: s_mov_b32 s1, s3
; GFX10PLUS-NEXT: s_mov_b32 s2, s4
; GFX10PLUS-NEXT: s_mov_b32 s3, s5
+; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v1, 16, v0
; GFX10PLUS-NEXT: s_mov_b32 s4, s6
; GFX10PLUS-NEXT: s_mov_b32 s5, s7
; GFX10PLUS-NEXT: s_mov_b32 s6, s8
define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) {
; GFX9-LABEL: load_3d_v4f32_xyzw_tfe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-NEXT: v_perm_b32 v5, v1, v0, s8
+; GFX9-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v8, v7
; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: v_mov_b32_e32 v10, v7
; GFX9-NEXT: v_mov_b32_e32 v11, v7
; GFX9-NEXT: v_mov_b32_e32 v0, v7
+; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: v_mov_b32_e32 v1, v8
; GFX9-NEXT: v_mov_b32_e32 v2, v9
; GFX10-LABEL: load_3d_v4f32_xyzw_tfe:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mov_b32_e32 v7, 0
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: v_mov_b32_e32 v6, v2
-; GFX10-NEXT: v_perm_b32 v5, v1, v0, 0x5040100
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: v_mov_b32_e32 v8, v7
; GFX10-NEXT: v_mov_b32_e32 v9, v7
; GFX10-NEXT: v_mov_b32_e32 v10, v7
; GFX10-NEXT: v_mov_b32_e32 v11, v7
+; GFX10-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, 0
-; GFX11-NEXT: v_perm_b32 v5, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: v_mov_b32_e32 v11, v7
; GFX11-NEXT: v_mov_b32_e32 v10, v7
; GFX11-NEXT: v_mov_b32_e32 v8, v7
+; GFX11-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
; GFX11-NEXT: s_mov_b32 s5, s7
define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) {
; GFX9-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_mov_b32 s0, s2
-; GFX9-NEXT: s_mov_b32 s2, s4
-; GFX9-NEXT: s_mov_b32 s4, s6
-; GFX9-NEXT: s_mov_b32 s6, s8
-; GFX9-NEXT: s_mov_b32 s8, 0x5040100
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_mov_b32_e32 v7, 0
; GFX9-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-NEXT: v_perm_b32 v5, v1, v0, s8
+; GFX9-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX9-NEXT: v_mov_b32_e32 v8, v7
; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: v_mov_b32_e32 v10, v7
; GFX9-NEXT: v_mov_b32_e32 v11, v7
; GFX9-NEXT: v_mov_b32_e32 v0, v7
+; GFX9-NEXT: s_mov_b32 s0, s2
; GFX9-NEXT: s_mov_b32 s1, s3
+; GFX9-NEXT: s_mov_b32 s2, s4
; GFX9-NEXT: s_mov_b32 s3, s5
+; GFX9-NEXT: s_mov_b32 s4, s6
; GFX9-NEXT: s_mov_b32 s5, s7
+; GFX9-NEXT: s_mov_b32 s6, s8
; GFX9-NEXT: s_mov_b32 s7, s9
; GFX9-NEXT: v_mov_b32_e32 v1, v8
; GFX9-NEXT: v_mov_b32_e32 v2, v9
; GFX10-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mov_b32_e32 v7, 0
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: v_mov_b32_e32 v6, v2
-; GFX10-NEXT: v_perm_b32 v5, v1, v0, 0x5040100
; GFX10-NEXT: s_mov_b32 s0, s2
; GFX10-NEXT: s_mov_b32 s1, s3
; GFX10-NEXT: v_mov_b32_e32 v8, v7
; GFX10-NEXT: v_mov_b32_e32 v9, v7
; GFX10-NEXT: v_mov_b32_e32 v10, v7
; GFX10-NEXT: v_mov_b32_e32 v11, v7
+; GFX10-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX10-NEXT: s_mov_b32 s2, s4
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: s_mov_b32 s4, s6
; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, 0
-; GFX11-NEXT: v_perm_b32 v5, v1, v0, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-NEXT: s_mov_b32 s0, s2
; GFX11-NEXT: s_mov_b32 s1, s3
; GFX11-NEXT: s_mov_b32 s2, s4
; GFX11-NEXT: v_mov_b32_e32 v11, v7
; GFX11-NEXT: v_mov_b32_e32 v10, v7
; GFX11-NEXT: v_mov_b32_e32 v8, v7
+; GFX11-NEXT: v_lshl_or_b32 v5, v1, 16, v0
; GFX11-NEXT: s_mov_b32 s3, s5
; GFX11-NEXT: s_mov_b32 s4, s6
; GFX11-NEXT: s_mov_b32 s5, s7
define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10-LABEL: sample_cd_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10-LABEL: sample_c_cd_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX10-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10-LABEL: sample_cd_cl_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
; GFX10-LABEL: sample_c_cd_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v8, v2
-; GFX10-NEXT: v_mov_b32_e32 v9, v3
; GFX10-NEXT: v_mov_b32_e32 v2, v0
-; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
-; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v3
+; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v1
; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10-LABEL: sample_d_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_2d:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
; GFX10-LABEL: sample_d_3d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v9, v3
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: v_mov_b32_e32 v3, v2
-; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9
; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_3d:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v4, v3, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3
; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v1, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
; GFX10-LABEL: sample_c_d_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX10-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_2d:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX11-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
; GFX10-LABEL: sample_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_d_cl_2d:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
; GFX10-LABEL: sample_c_d_cl_2d:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v8, v2
-; GFX10-NEXT: v_mov_b32_e32 v9, v3
; GFX10-NEXT: v_mov_b32_e32 v2, v0
-; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
-; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v3
+; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v1
; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_cl_2d:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX11-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v9, v2
; GFX10-NEXT: v_mov_b32_e32 v10, v3
-; GFX10-NEXT: v_mov_b32_e32 v11, v4
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: v_mov_b32_e32 v3, v1
-; GFX10-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
-; GFX10-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v9
+; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
+; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_o_2darray_V1:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v5, v4, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX11-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX11-NEXT: v_lshl_or_b32 v3, v5, 16, v4
; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: v_mov_b32_e32 v9, v2
; GFX10-NEXT: v_mov_b32_e32 v10, v3
-; GFX10-NEXT: v_mov_b32_e32 v11, v4
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: v_mov_b32_e32 v3, v1
-; GFX10-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
-; GFX10-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v9
+; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1
+; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0
; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: sample_c_d_o_2darray_V2:
; GFX11: ; %bb.0: ; %main_body
-; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v5, v4, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX11-NEXT: v_lshl_or_b32 v2, v3, 16, v2
+; GFX11-NEXT: v_lshl_or_b32 v3, v5, 16, v4
; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: image_bvh_intersect_ray_a16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_perm_b32 v9, v5, v7, 0x5040100
-; GFX11-NEXT: v_perm_b32 v10, v5, v7, 0x7060302
-; GFX11-NEXT: v_perm_b32 v11, v6, v8, 0x5040100
-; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v0, v1, v[2:4], v[9:11]], s[0:3] a16
+; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v7
+; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v8
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_lshl_or_b32 v8, v5, 16, v9
+; GFX11-NEXT: v_perm_b32 v9, v5, v7, 0x7060302
+; GFX11-NEXT: v_lshl_or_b32 v10, v6, 16, v10
+; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v0, v1, v[2:4], v[8:10]], s[0:3] a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
;
; GFX11-LABEL: image_bvh64_intersect_ray_a16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_perm_b32 v10, v6, v8, 0x5040100
-; GFX11-NEXT: v_perm_b32 v11, v6, v8, 0x7060302
-; GFX11-NEXT: v_perm_b32 v12, v7, v9, 0x5040100
-; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[0:1], v2, v[3:5], v[10:12]], s[0:3] a16
+; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v8
+; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v9
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_lshl_or_b32 v9, v6, 16, v10
+; GFX11-NEXT: v_perm_b32 v10, v6, v8, 0x7060302
+; GFX11-NEXT: v_lshl_or_b32 v11, v7, 16, v11
+; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[0:1], v2, v[3:5], v[9:11]], s[0:3] a16
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
; GFX11-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
+; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_and_b32 v0, 0xffff, v7
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v8
; GFX11-NEXT: v_dual_mov_b32 v13, v2 :: v_dual_mov_b32 v14, v3
-; GFX11-NEXT: v_mov_b32_e32 v15, v4
-; GFX11-NEXT: v_perm_b32 v4, v5, v7, 0x5040100
-; GFX11-NEXT: v_perm_b32 v5, v5, v7, 0x7060302
-; GFX11-NEXT: v_perm_b32 v6, v6, v8, 0x5040100
; GFX11-NEXT: s_mov_b32 s1, exec_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-NEXT: v_lshl_or_b32 v4, v5, 16, v0
+; GFX11-NEXT: v_perm_b32 v5, v5, v7, 0x7060302
+; GFX11-NEXT: v_lshl_or_b32 v6, v6, 16, v1
; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: v_readfirstlane_b32 s4, v9
; GFX11-NEXT: v_readfirstlane_b32 s5, v10
; GFX11-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_dual_mov_b32 v17, v0 :: v_dual_mov_b32 v18, v1
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v8
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v9
; GFX11-NEXT: v_dual_mov_b32 v19, v2 :: v_dual_mov_b32 v14, v3
; GFX11-NEXT: v_dual_mov_b32 v15, v4 :: v_dual_mov_b32 v16, v5
-; GFX11-NEXT: v_perm_b32 v4, v6, v8, 0x5040100
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_lshl_or_b32 v4, v6, 16, v0
; GFX11-NEXT: v_perm_b32 v5, v6, v8, 0x7060302
-; GFX11-NEXT: v_perm_b32 v6, v7, v9, 0x5040100
+; GFX11-NEXT: v_lshl_or_b32 v6, v7, 16, v1
; GFX11-NEXT: s_mov_b32 s1, exec_lo
; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: v_readfirstlane_b32 s4, v10
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
; GFX1030-NEXT: v_lshlrev_b32_e32 v4, 2, v0
-; GFX1030-NEXT: s_movk_i32 s9, 0x4600
-; GFX1030-NEXT: s_movk_i32 s8, 0x4700
-; GFX1030-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX1030-NEXT: v_mov_b32_e32 v5, 0x44004200
+; GFX1030-NEXT: v_mov_b32_e32 v6, 0x46004500
+; GFX1030-NEXT: v_mov_b32_e32 v7, 0x48004700
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v0, s0
; GFX1030-NEXT: v_mov_b32_e32 v1, s1
; GFX1030-NEXT: v_mov_b32_e32 v2, s2
; GFX1030-NEXT: v_mov_b32_e32 v3, s3
-; GFX1030-NEXT: s_movk_i32 s1, 0x4400
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
-; GFX1030-NEXT: s_movk_i32 s2, 0x4200
+; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
; GFX1030-NEXT: flat_load_dword v0, v[0:1]
; GFX1030-NEXT: flat_load_dword v1, v[2:3]
-; GFX1030-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX1030-NEXT: s_movk_i32 s3, 0x4800
-; GFX1030-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX1030-NEXT: s_lshl_b32 s1, s1, 16
-; GFX1030-NEXT: s_movk_i32 s0, 0x4500
-; GFX1030-NEXT: s_or_b32 s1, s2, s1
-; GFX1030-NEXT: s_bfe_u32 s2, s9, 0x100000
-; GFX1030-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX1030-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX1030-NEXT: s_lshl_b32 s2, s2, 16
-; GFX1030-NEXT: s_lshl_b32 s3, s3, 16
-; GFX1030-NEXT: s_or_b32 s0, s0, s2
-; GFX1030-NEXT: s_or_b32 s2, s8, s3
; GFX1030-NEXT: v_mov_b32_e32 v2, 0
; GFX1030-NEXT: v_mov_b32_e32 v3, 1.0
-; GFX1030-NEXT: v_mov_b32_e32 v4, 2.0
-; GFX1030-NEXT: v_mov_b32_e32 v5, s1
-; GFX1030-NEXT: v_mov_b32_e32 v6, s0
-; GFX1030-NEXT: v_mov_b32_e32 v7, s2
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[4:7] a16
; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1013: ; %bb.0:
; GFX1013-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24
; GFX1013-NEXT: v_lshlrev_b32_e32 v6, 2, v0
-; GFX1013-NEXT: s_movk_i32 s9, 0x4600
-; GFX1013-NEXT: s_movk_i32 s8, 0x4700
-; GFX1013-NEXT: s_bfe_u32 s8, s8, 0x100000
+; GFX1013-NEXT: v_mov_b32_e32 v7, 0x48004700
; GFX1013-NEXT: s_waitcnt lgkmcnt(0)
; GFX1013-NEXT: v_mov_b32_e32 v0, s0
; GFX1013-NEXT: v_mov_b32_e32 v1, s1
; GFX1013-NEXT: v_mov_b32_e32 v2, s2
; GFX1013-NEXT: v_mov_b32_e32 v3, s3
-; GFX1013-NEXT: s_movk_i32 s1, 0x4400
; GFX1013-NEXT: v_add_co_u32 v4, vcc_lo, v0, v6
; GFX1013-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
; GFX1013-NEXT: v_add_co_u32 v2, vcc_lo, v2, v6
; GFX1013-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
-; GFX1013-NEXT: s_movk_i32 s2, 0x4200
+; GFX1013-NEXT: v_mov_b32_e32 v6, 0x46004500
; GFX1013-NEXT: flat_load_dword v0, v[4:5]
; GFX1013-NEXT: flat_load_dword v1, v[2:3]
-; GFX1013-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX1013-NEXT: s_movk_i32 s3, 0x4800
-; GFX1013-NEXT: s_bfe_u32 s2, s2, 0x100000
-; GFX1013-NEXT: s_lshl_b32 s1, s1, 16
-; GFX1013-NEXT: s_movk_i32 s0, 0x4500
-; GFX1013-NEXT: s_or_b32 s1, s2, s1
-; GFX1013-NEXT: s_bfe_u32 s2, s9, 0x100000
-; GFX1013-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GFX1013-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX1013-NEXT: s_lshl_b32 s2, s2, 16
-; GFX1013-NEXT: s_lshl_b32 s3, s3, 16
-; GFX1013-NEXT: s_or_b32 s0, s0, s2
-; GFX1013-NEXT: s_or_b32 s2, s8, s3
; GFX1013-NEXT: v_mov_b32_e32 v2, 0
; GFX1013-NEXT: v_mov_b32_e32 v3, 1.0
; GFX1013-NEXT: v_mov_b32_e32 v4, 2.0
-; GFX1013-NEXT: v_mov_b32_e32 v5, s1
-; GFX1013-NEXT: v_mov_b32_e32 v6, s0
-; GFX1013-NEXT: v_mov_b32_e32 v7, s2
+; GFX1013-NEXT: v_mov_b32_e32 v5, 0x44004200
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh_intersect_ray v[0:3], v[0:7], s[4:7] a16
; GFX1013-NEXT: s_waitcnt vmcnt(0)
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GFX1030-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX1030-NEXT: s_movk_i32 s6, 0x4200
-; GFX1030-NEXT: s_movk_i32 s7, 0x4800
-; GFX1030-NEXT: s_bfe_u32 s6, s6, 0x100000
-; GFX1030-NEXT: s_movk_i32 s9, 0x4600
-; GFX1030-NEXT: s_movk_i32 s8, 0x4700
-; GFX1030-NEXT: s_bfe_u32 s7, s7, 0x100000
-; GFX1030-NEXT: s_bfe_u32 s8, s8, 0x100000
-; GFX1030-NEXT: s_lshl_b32 s7, s7, 16
; GFX1030-NEXT: v_mov_b32_e32 v3, 0
; GFX1030-NEXT: v_mov_b32_e32 v4, 1.0
; GFX1030-NEXT: v_mov_b32_e32 v5, 2.0
+; GFX1030-NEXT: v_mov_b32_e32 v6, 0x44004200
+; GFX1030-NEXT: v_mov_b32_e32 v7, 0x46004500
+; GFX1030-NEXT: v_mov_b32_e32 v8, 0x48004700
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v0, s4
; GFX1030-NEXT: v_mov_b32_e32 v1, s5
-; GFX1030-NEXT: s_movk_i32 s5, 0x4400
-; GFX1030-NEXT: s_movk_i32 s4, 0x4500
-; GFX1030-NEXT: s_bfe_u32 s5, s5, 0x100000
; GFX1030-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX1030-NEXT: s_lshl_b32 s5, s5, 16
-; GFX1030-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX1030-NEXT: s_or_b32 s5, s6, s5
; GFX1030-NEXT: flat_load_dword v2, v[0:1]
-; GFX1030-NEXT: s_bfe_u32 s6, s9, 0x100000
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6
-; GFX1030-NEXT: s_lshl_b32 s6, s6, 16
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
-; GFX1030-NEXT: s_or_b32 s4, s4, s6
-; GFX1030-NEXT: s_or_b32 s6, s8, s7
-; GFX1030-NEXT: v_mov_b32_e32 v6, s5
-; GFX1030-NEXT: v_mov_b32_e32 v7, s4
-; GFX1030-NEXT: v_mov_b32_e32 v8, s6
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX1030-NEXT: s_waitcnt vmcnt(0)
; GFX1013-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX1013-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GFX1013-NEXT: v_lshlrev_b32_e32 v2, 2, v0
-; GFX1013-NEXT: s_movk_i32 s1, 0x4400
-; GFX1013-NEXT: s_movk_i32 s9, 0x4600
-; GFX1013-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX1013-NEXT: s_movk_i32 s0, 0x4500
-; GFX1013-NEXT: s_lshl_b32 s1, s1, 16
-; GFX1013-NEXT: s_movk_i32 s8, 0x4700
-; GFX1013-NEXT: s_bfe_u32 s0, s0, 0x100000
-; GFX1013-NEXT: s_bfe_u32 s8, s8, 0x100000
; GFX1013-NEXT: v_mov_b32_e32 v3, 0
; GFX1013-NEXT: v_mov_b32_e32 v4, 1.0
; GFX1013-NEXT: v_mov_b32_e32 v5, 2.0
+; GFX1013-NEXT: v_mov_b32_e32 v6, 0x44004200
+; GFX1013-NEXT: v_mov_b32_e32 v7, 0x46004500
+; GFX1013-NEXT: v_mov_b32_e32 v8, 0x48004700
; GFX1013-NEXT: s_waitcnt lgkmcnt(0)
; GFX1013-NEXT: v_mov_b32_e32 v0, s2
; GFX1013-NEXT: v_mov_b32_e32 v1, s3
-; GFX1013-NEXT: s_movk_i32 s2, 0x4200
-; GFX1013-NEXT: s_movk_i32 s3, 0x4800
-; GFX1013-NEXT: s_bfe_u32 s2, s2, 0x100000
; GFX1013-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX1013-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-; GFX1013-NEXT: s_or_b32 s1, s2, s1
-; GFX1013-NEXT: s_bfe_u32 s2, s9, 0x100000
-; GFX1013-NEXT: s_bfe_u32 s3, s3, 0x100000
; GFX1013-NEXT: flat_load_dword v2, v[0:1]
-; GFX1013-NEXT: s_lshl_b32 s2, s2, 16
-; GFX1013-NEXT: s_lshl_b32 s3, s3, 16
-; GFX1013-NEXT: s_or_b32 s0, s0, s2
-; GFX1013-NEXT: s_or_b32 s2, s8, s3
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
-; GFX1013-NEXT: v_mov_b32_e32 v6, s1
-; GFX1013-NEXT: v_mov_b32_e32 v7, s0
-; GFX1013-NEXT: v_mov_b32_e32 v8, s2
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[4:7] a16
; GFX1013-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3:
; GFX908-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+ ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; PACKED-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+ ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED-NEXT: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7)
; PACKED-NEXT: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; UNPACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+ ; UNPACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; UNPACKED-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec
; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; PACKED-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+ ; PACKED-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; PACKED-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.2:
; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 7)
; CHECK-NEXT: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 7)
; CHECK-NEXT: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 7)
; CHECK-NEXT: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY8]], [[COPY6]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: bb.3:
; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: bb.3:
; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY6]], [[COPY4]], implicit $exec
; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY7]], [[COPY5]], implicit $exec
- ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: bb.3:
; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX908-LABEL: v_sdot2:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2:
;
; GFX908-LABEL: v_sdot2_sgpr_sgpr_sgpr:
; GFX908: ; %bb.0:
-; GFX908-NEXT: v_mov_b32_e32 v0, s1
-; GFX908-NEXT: v_mov_b32_e32 v1, s2
-; GFX908-NEXT: v_dot2_i32_i16 v0, s0, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v1, s1
+; GFX908-NEXT: v_mov_b32_e32 v0, s2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v0, s0, v1
; GFX908-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: v_sdot2_sgpr_sgpr_sgpr:
; GFX908-LABEL: v_sdot2_inline_literal_a:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, 4, v0, v1 op_sel_hi:[0,1,1]
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v1, 0x40004, v0
+; GFX908-NEXT: v_mov_b32_e32 v0, v1
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_inline_literal_a:
; GFX908-LABEL: v_sdot2_inline_literal_b:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, 4, v1 op_sel_hi:[1,0,1]
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v1, 0x40004, v0
+; GFX908-NEXT: v_mov_b32_e32 v0, v1
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_inline_literal_b:
; GFX908-LABEL: v_sdot2_inline_literal_a_b:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, 8, 4, v1 op_sel_hi:[0,0,1]
+; GFX908-NEXT: v_mov_b32_e32 v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x40004
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v0, 0x80008, v1
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_inline_literal_a_b:
; GFX908-LABEL: v_sdot2_inline_literal_a_b_c:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, 8, 4, 8 op_sel_hi:[0,0,1]
+; GFX908-NEXT: v_mov_b32_e32 v1, 0x40004
+; GFX908-NEXT: v_mov_b32_e32 v0, 8
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v0, 0x80008, v1
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_inline_literal_a_b_c:
; GFX908-LABEL: v_sdot2_inline_literal_c:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, 7
+; GFX908-NEXT: v_mov_b32_e32 v2, 7
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_inline_literal_c:
; GFX908-LABEL: v_sdot2_fneg_a:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
+; GFX908-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_fneg_a:
; GFX908-LABEL: v_sdot2_fneg_b:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
+; GFX908-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_fneg_b:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_fnegf32_c:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_fnegv2f16_c:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_alignbit_b32 v0, v0, v0, 16
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_shuffle10_a:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_alignbit_b32 v1, v1, v1, 16
-; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
+; GFX908-NEXT: v_dot2c_i32_i16_e32 v2, v0, v1
+; GFX908-NEXT: v_mov_b32_e32 v0, v2
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_sdot2_shuffle10_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2
+; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%r = call i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 false)
ret i32 %r
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 24, v6
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
; GFX10-NEXT: v_or3_b32 v1, v3, v4, v5
-; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v8
+; GFX10-NEXT: v_dot4c_i32_i8_e32 v8, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v0, v8
; GFX10-NEXT: s_setpc_b64 s[30:31]
%a.cast = bitcast <4 x i8> %a to i32
%b.cast = bitcast <4 x i8> %b to i32
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
-; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2
+; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg float %a
%cast.neg.a = bitcast float %neg.a to i32
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
-; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2
+; GFX10-NEXT: v_dot4c_i32_i8_e32 v2, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v0, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg <2 x half> %a
%cast.neg.a = bitcast <2 x half> %neg.a to i32
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; GFX908-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX90A-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3:
; GFX908-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; GFX90A-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; GFX90A-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GFX90A-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; PACKED-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; PACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; PACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; PACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; PACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED-NEXT: {{ $}}
; PACKED-NEXT: bb.3:
; UNPACKED-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; UNPACKED-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; UNPACKED-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; UNPACKED-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED-NEXT: {{ $}}
; UNPACKED-NEXT: bb.3:
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
- ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: s_wqm_b64 exec, exec
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
-; SI-NEXT: s_movk_i32 s2, 0x3c00
-; SI-NEXT: s_bfe_u32 s3, 0, 0x100000
-; SI-NEXT: s_bfe_u32 s2, s2, 0x100000
-; SI-NEXT: s_lshl_b32 s4, s3, 16
-; SI-NEXT: s_or_b32 s4, s2, s4
-; SI-NEXT: s_lshl_b32 s2, s2, 16
-; SI-NEXT: s_or_b32 s5, s3, s2
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 .LBB6_7
; SI-NEXT: ; %bb.2: ; %.demote0
-; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
-; SI-NEXT: s_and_b64 exec, exec, s[6:7]
+; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
+; SI-NEXT: s_and_b64 exec, exec, s[4:5]
; SI-NEXT: .LBB6_3: ; %.continue0
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 s[2:3], s[0:1]
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; SI-NEXT: s_and_b64 s[2:3], s[0:1], vcc
; SI-NEXT: s_xor_b64 s[2:3], s[2:3], -1
-; SI-NEXT: s_and_saveexec_b64 s[6:7], s[2:3]
-; SI-NEXT: s_xor_b64 s[2:3], exec, s[6:7]
+; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
+; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
; SI-NEXT: s_cbranch_execz .LBB6_6
; SI-NEXT: ; %bb.4: ; %.demote1
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_mov_b64 exec, 0
; SI-NEXT: .LBB6_6: ; %.continue1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
-; SI-NEXT: v_mov_b32_e32 v0, s4
-; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: v_mov_b32_e32 v0, 0x3c00
+; SI-NEXT: v_bfrev_b32_e32 v1, 60
; SI-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB6_7:
; SI-NEXT: s_mov_b64 s[0:1], exec
; SI-NEXT: s_wqm_b64 exec, exec
; SI-NEXT: v_cvt_i32_f32_e32 v0, v0
-; SI-NEXT: s_movk_i32 s2, 0x3c00
-; SI-NEXT: s_bfe_u32 s3, 0, 0x100000
-; SI-NEXT: s_bfe_u32 s2, s2, 0x100000
-; SI-NEXT: s_lshl_b32 s4, s3, 16
-; SI-NEXT: s_or_b32 s6, s2, s4
-; SI-NEXT: s_lshl_b32 s2, s2, 16
-; SI-NEXT: s_or_b32 s7, s3, s2
; SI-NEXT: s_mov_b32 s4, 0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 .LBB7_9
; SI-NEXT: ; %bb.2: ; %.demote0
-; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
-; SI-NEXT: s_and_b64 exec, exec, s[8:9]
+; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
+; SI-NEXT: s_and_b64 exec, exec, s[6:7]
; SI-NEXT: .LBB7_3: ; %.continue0.preheader
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: v_cmp_eq_f32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[4:5], s[0:1], vcc
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; SI-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
-; SI-NEXT: s_xor_b64 s[4:5], exec, s[8:9]
+; SI-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
+; SI-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
; SI-NEXT: s_cbranch_execz .LBB7_4
; SI-NEXT: ; %bb.6: ; %.demote1
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
; SI-NEXT: s_cbranch_scc0 .LBB7_9
; SI-NEXT: ; %bb.7: ; %.demote1
; SI-NEXT: ; in Loop: Header=BB7_5 Depth=1
-; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
-; SI-NEXT: s_and_b64 exec, exec, s[8:9]
+; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
+; SI-NEXT: s_and_b64 exec, exec, s[6:7]
; SI-NEXT: s_branch .LBB7_4
; SI-NEXT: .LBB7_8: ; %.return
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_and_b64 exec, exec, s[0:1]
-; SI-NEXT: v_mov_b32_e32 v0, s6
-; SI-NEXT: v_mov_b32_e32 v1, s7
+; SI-NEXT: v_mov_b32_e32 v0, 0x3c00
+; SI-NEXT: v_bfrev_b32_e32 v1, 60
; SI-NEXT: exp mrt0 v0, v0, v1, v1 done compr vm
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB7_9:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%result = mul i16 %num, %den
ret i16 %result
; GCN-LABEL: v_orn2_i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
+; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_or_b32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%not.src1 = xor i32 %src1, -1
define amdgpu_ps float @v_orn2_i32_sv(i32 inreg %src0, i32 %src1) {
; GCN-LABEL: v_orn2_i32_sv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
+; GCN-NEXT: v_not_b32_e32 v0, v0
; GCN-NEXT: v_or_b32_e32 v0, s2, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_orn2_i32_sv:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
; GFX10PLUS-NEXT: v_or_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i32 %src1, -1
; GCN-LABEL: v_orn2_i64:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_xor_b32_e32 v2, -1, v2
-; GCN-NEXT: v_xor_b32_e32 v3, -1, v3
+; GCN-NEXT: v_not_b32_e32 v2, v2
+; GCN-NEXT: v_not_b32_e32 v3, v3
; GCN-NEXT: v_or_b32_e32 v0, v0, v2
; GCN-NEXT: v_or_b32_e32 v1, v1, v3
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2
-; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3
+; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2
+; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3
; GFX10PLUS-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10PLUS-NEXT: v_or_b32_e32 v1, v1, v3
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
define amdgpu_ps <2 x float> @v_orn2_i64_sv(i64 inreg %src0, i64 %src1) {
; GCN-LABEL: v_orn2_i64_sv:
; GCN: ; %bb.0:
-; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
-; GCN-NEXT: v_xor_b32_e32 v1, -1, v1
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: v_not_b32_e32 v1, v1
; GCN-NEXT: v_or_b32_e32 v0, s2, v0
; GCN-NEXT: v_or_b32_e32 v1, s3, v1
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_orn2_i64_sv:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0
+; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, s2, v0
; GFX10PLUS-NEXT: v_or_b32_e32 v1, s3, v1
; GFX10PLUS-NEXT: ; return to shader part epilog
; GCN: ; %bb.0:
; GCN-NEXT: v_xor_b32_e32 v0, -1, v0
; GCN-NEXT: v_or_b32_e32 v0, s2, v0
-; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_orn2_i16_sv:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX10PLUS-NEXT: v_or_b32_e32 v0, s2, v0
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i16 %src1, -1
%or = or i16 %src0, %not.src1
; GCN: ; %bb.0:
; GCN-NEXT: s_xor_b32 s0, s2, -1
; GCN-NEXT: v_or_b32_e32 v0, s0, v0
-; GCN-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: v_orn2_i16_vs:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_xor_b32 s0, s2, -1
; GFX10PLUS-NEXT: v_or_b32_e32 v0, s0, v0
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: ; return to shader part epilog
%not.src1 = xor i16 %src1, -1
%or = or i16 %src0, %not.src1
;
; GFX8-LABEL: s_saddsat_i7:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_sext_i32_i16 s3, s0
-; GFX8-NEXT: s_sext_i32_i16 s4, 0
-; GFX8-NEXT: s_max_i32 s5, s3, s4
-; GFX8-NEXT: s_min_i32 s3, s3, s4
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
+; GFX8-NEXT: s_lshl_b32 s0, s0, 9
+; GFX8-NEXT: s_sext_i32_i16 s2, s0
+; GFX8-NEXT: s_sext_i32_i16 s3, 0
+; GFX8-NEXT: s_max_i32 s4, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
+; GFX8-NEXT: s_lshl_b32 s1, s1, 9
+; GFX8-NEXT: s_sub_i32 s2, 0xffff8000, s2
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
-; GFX8-NEXT: s_max_i32 s1, s3, s1
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
+; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_min_i32 s1, s1, s3
+; GFX8-NEXT: s_sext_i32_i16 s2, s4
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s0, s0, s2
+; GFX8-NEXT: s_ashr_i32 s0, s0, 9
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_saddsat_i7:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 9
+; GFX9-NEXT: s_lshl_b32 s0, s0, 9
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_add_i16 v0, s0, v0 clamp
; GFX9-NEXT: v_ashrrev_i16_e32 v0, 9, v0
;
; GFX10PLUS-LABEL: s_saddsat_i7:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 9
; GFX10PLUS-NEXT: v_add_nc_i16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 9, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
;
; GFX8-LABEL: s_saddsat_i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_sext_i32_i16 s3, s0
-; GFX8-NEXT: s_sext_i32_i16 s4, 0
-; GFX8-NEXT: s_max_i32 s5, s3, s4
-; GFX8-NEXT: s_min_i32 s3, s3, s4
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s2, s0
+; GFX8-NEXT: s_sext_i32_i16 s3, 0
+; GFX8-NEXT: s_max_i32 s4, s2, s3
+; GFX8-NEXT: s_min_i32 s2, s2, s3
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s2, 0xffff8000, s2
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
-; GFX8-NEXT: s_max_i32 s1, s3, s1
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
+; GFX8-NEXT: s_max_i32 s1, s2, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s3, s5
-; GFX8-NEXT: s_min_i32 s1, s1, s3
+; GFX8-NEXT: s_sext_i32_i16 s2, s4
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s0, s0, s2
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_saddsat_i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
+; GFX9-NEXT: s_lshl_b32 s0, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_add_i16 v0, s0, v0 clamp
; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
;
; GFX10PLUS-LABEL: s_saddsat_i8:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 8
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 8
; GFX10PLUS-NEXT: v_add_nc_i16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_add_i16 v0, v0, v1 clamp
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_movk_i32 s4, 0xff
-; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_add_i16 v0, v0, v1 clamp
;
; GFX8-LABEL: s_saddsat_v2i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_sext_i32_i16 s5, s0
-; GFX8-NEXT: s_sext_i32_i16 s6, 0
-; GFX8-NEXT: s_max_i32 s7, s5, s6
-; GFX8-NEXT: s_min_i32 s5, s5, s6
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s4, s0
+; GFX8-NEXT: s_sext_i32_i16 s5, 0
+; GFX8-NEXT: s_max_i32 s6, s4, s5
+; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
-; GFX8-NEXT: s_sub_i32 s5, 0xffff8000, s5
-; GFX8-NEXT: s_sext_i32_i16 s5, s5
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s4, 0xffff8000, s4
+; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
-; GFX8-NEXT: s_max_i32 s1, s5, s1
+; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
+; GFX8-NEXT: s_max_i32 s1, s4, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s5, s7
-; GFX8-NEXT: s_min_i32 s1, s1, s5
+; GFX8-NEXT: s_sext_i32_i16 s4, s6
+; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_add_i32 s0, s0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s2, s4
-; GFX8-NEXT: s_lshl_b32 s2, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s2, 8
+; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s1
-; GFX8-NEXT: s_max_i32 s5, s3, s6
-; GFX8-NEXT: s_min_i32 s3, s3, s6
+; GFX8-NEXT: s_max_i32 s4, s3, s5
+; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sub_i32 s3, 0xffff8000, s3
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s2, s3, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sext_i32_i16 s3, s5
+; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s1, s1, s4
-; GFX8-NEXT: s_ashr_i32 s0, s0, s4
+; GFX8-NEXT: s_ashr_i32 s1, s1, 8
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_saddsat_v4i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX9-NEXT: v_perm_b32 v2, v2, v0, s4
+; GFX9-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX9-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX9-NEXT: v_perm_b32 v3, v4, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v6
+; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX9-NEXT: v_alignbit_b32 v1, v5, v1, 16
; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX10-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v1
+; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX10-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX10-NEXT: v_perm_b32 v3, v4, v1, 0x5040100
-; GFX10-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 24
-; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
+; GFX10-NEXT: v_lshl_or_b32 v3, v5, 16, v6
+; GFX10-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_i16 v2, v2, v3 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX11-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v3, v1, 0x5040100
-; GFX11-NEXT: v_alignbit_b32 v0, v4, v0, 16
-; GFX11-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v2, 16, v4
+; GFX11-NEXT: v_lshl_or_b32 v3, v3, 16, v5
+; GFX11-NEXT: v_alignbit_b32 v0, v6, v0, 16
+; GFX11-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX11-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
;
; GFX8-LABEL: s_saddsat_v4i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_sext_i32_i16 s9, s0
-; GFX8-NEXT: s_sext_i32_i16 s10, 0
-; GFX8-NEXT: s_max_i32 s11, s9, s10
-; GFX8-NEXT: s_min_i32 s9, s9, s10
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s8, s0
+; GFX8-NEXT: s_sext_i32_i16 s9, 0
+; GFX8-NEXT: s_max_i32 s10, s8, s9
+; GFX8-NEXT: s_min_i32 s8, s8, s9
; GFX8-NEXT: s_lshr_b32 s5, s1, 8
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
-; GFX8-NEXT: s_sub_i32 s9, 0xffff8000, s9
-; GFX8-NEXT: s_sext_i32_i16 s9, s9
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s8, 0xffff8000, s8
+; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s11, 0x7fff, s11
-; GFX8-NEXT: s_max_i32 s1, s9, s1
+; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
+; GFX8-NEXT: s_max_i32 s1, s8, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s9, s11
-; GFX8-NEXT: s_min_i32 s1, s1, s9
+; GFX8-NEXT: s_sext_i32_i16 s8, s10
+; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_add_i32 s0, s0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s2, s8
-; GFX8-NEXT: s_lshl_b32 s2, s5, s8
+; GFX8-NEXT: s_lshl_b32 s1, s2, 8
+; GFX8-NEXT: s_lshl_b32 s2, s5, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_max_i32 s9, s5, s10
-; GFX8-NEXT: s_min_i32 s5, s5, s10
+; GFX8-NEXT: s_max_i32 s8, s5, s9
+; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sub_i32 s5, 0xffff8000, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
+; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_max_i32 s2, s5, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sext_i32_i16 s5, s9
+; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_add_i32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s2, s3, s8
+; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s2
-; GFX8-NEXT: s_lshl_b32 s3, s6, s8
-; GFX8-NEXT: s_max_i32 s6, s5, s10
-; GFX8-NEXT: s_min_i32 s5, s5, s10
+; GFX8-NEXT: s_lshl_b32 s3, s6, 8
+; GFX8-NEXT: s_max_i32 s6, s5, s9
+; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sub_i32 s5, 0xffff8000, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_add_i32 s2, s2, s3
-; GFX8-NEXT: s_lshl_b32 s3, s4, s8
+; GFX8-NEXT: s_lshl_b32 s3, s4, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s3
-; GFX8-NEXT: s_max_i32 s6, s5, s10
-; GFX8-NEXT: s_min_i32 s5, s5, s10
-; GFX8-NEXT: s_lshl_b32 s4, s7, s8
+; GFX8-NEXT: s_max_i32 s6, s5, s9
+; GFX8-NEXT: s_min_i32 s5, s5, s9
+; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_sub_i32 s5, 0xffff8000, s5
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s4, s5, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s1, s1, s8
+; GFX8-NEXT: s_ashr_i32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s6
-; GFX8-NEXT: s_ashr_i32 s0, s0, s8
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_ashr_i32 s2, s2, s8
+; GFX8-NEXT: s_ashr_i32 s2, s2, 8
; GFX8-NEXT: s_add_i32 s3, s3, s4
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s2, 0xff
-; GFX8-NEXT: s_ashr_i32 s3, s3, s8
+; GFX8-NEXT: s_ashr_i32 s3, s3, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s3, 0xff
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_min_i32 s1, s1, s3
; GFX8-NEXT: s_add_i32 s2, s2, s1
-; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s5, s5, s2
-; GFX8-NEXT: s_bfe_u32 s2, s4, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s2, s5, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s5
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_min_i32 s3, s3, s4
; GFX8-NEXT: s_add_i32 s8, s8, s3
-; GFX8-NEXT: s_bfe_u32 s3, s6, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s6
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_or_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s3, s7, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s7
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_or_b32 s1, s1, s3
-; GFX8-NEXT: s_bfe_u32 s3, s8, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s8
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_or_b32 s2, s2, s3
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_add_i32 s11, s11, s4
-; GFX8-NEXT: s_bfe_u32 s4, s8, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s8
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s0, s0, s4
-; GFX8-NEXT: s_bfe_u32 s4, s9, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s9
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s1, s1, s4
-; GFX8-NEXT: s_bfe_u32 s4, s10, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s10
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s2, s2, s4
-; GFX8-NEXT: s_bfe_u32 s4, s11, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s11
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s3, s3, s4
; GFX8-NEXT: ; return to shader part epilog
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s5, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GFX9-NEXT: v_subrev_u32_e32 v4, s5, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
; GFX9-NEXT: s_xor_b32 s4, s11, s7
+; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0
; GFX9-NEXT: v_xor_b32_e32 v2, s10, v2
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
; GFX9-NEXT: v_xor_b32_e32 v1, s4, v1
-; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3
-; GFX9-NEXT: v_subrev_u32_e32 v0, s6, v0
; GFX9-NEXT: v_subrev_u32_e32 v2, s10, v2
; GFX9-NEXT: v_subrev_u32_e32 v1, s4, v1
+; GFX9-NEXT: v_xor_b32_e32 v3, s11, v3
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_subrev_u32_e32 v3, s11, v3
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v2, s4
+; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v1, s[2:3]
; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s1
; GFX10-NEXT: s_sub_i32 s6, 0, s2
-; GFX10-NEXT: s_sub_i32 s7, 0, s1
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX10-NEXT: v_mul_lo_u32 v2, s6, v0
-; GFX10-NEXT: v_mul_lo_u32 v3, s7, v1
+; GFX10-NEXT: s_sub_i32 s6, 0, s1
+; GFX10-NEXT: v_mul_lo_u32 v3, s6, v1
; GFX10-NEXT: s_sext_i32_i16 s6, s0
; GFX10-NEXT: s_bfe_i32 s0, s0, 0x100010
; GFX10-NEXT: s_ashr_i32 s9, s6, 31
; GFX10-NEXT: s_ashr_i32 s10, s0, 31
+; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX10-NEXT: s_add_i32 s6, s6, s9
; GFX10-NEXT: s_add_i32 s0, s0, s10
-; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
; GFX10-NEXT: s_xor_b32 s6, s6, s9
; GFX10-NEXT: s_xor_b32 s0, s0, s10
; GFX10-NEXT: v_mul_hi_u32 v0, s6, v0
; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1
; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2
-; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1
; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1
+; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, s6, v2
-; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1
-; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3
+; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3
+; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v2
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s2, v2
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
+; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo
; GFX10-NEXT: s_xor_b32 s1, s9, s3
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: s_xor_b32 s0, s10, s8
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0
; GFX10-NEXT: v_xor_b32_e32 v0, s1, v0
-; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v5, s0
; GFX10-NEXT: v_xor_b32_e32 v2, s9, v2
-; GFX10-NEXT: v_xor_b32_e32 v3, s10, v3
+; GFX10-NEXT: s_xor_b32 s0, s10, s8
+; GFX10-NEXT: v_xor_b32_e32 v1, s0, v1
; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s1, v0
-; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1
+; GFX10-NEXT: v_xor_b32_e32 v3, s10, v3
; GFX10-NEXT: v_subrev_nc_u32_e32 v2, s9, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s0, v1
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s10, v3
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-NEXT: global_store_dword v1, v2, s[6:7]
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s6, 0
-; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0
+; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GCN-NEXT: v_cndmask_b32_e64 v4, v0, 0, vcc
; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1
; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GCN-NEXT: v_cndmask_b32_e64 v4, v0, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[4:5]
-; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1
-; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, s6 src0_sel:WORD_1 src1_sel:DWORD
; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc
+; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, s6 src0_sel:WORD_1 src1_sel:DWORD
; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, s[4:5]
-; GCN-NEXT: s_mov_b32 s4, 0x5040100
-; GCN-NEXT: v_perm_b32 v0, v0, v4, s4
-; GCN-NEXT: v_perm_b32 v1, v1, v2, s4
+; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v4
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v3
+; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%fcmp = fcmp olt <4 x half> %a, zeroinitializer
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s6, 0
-; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0
+; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1
; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v1
-; GCN-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5]
-; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1
+; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc
; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, s6 src0_sel:WORD_1 src1_sel:DWORD
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v2
; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v1, v5, 0, s[4:5]
-; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v2
+; GCN-NEXT: v_cndmask_b32_e64 v5, v2, 0, vcc
; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v2, s6 src0_sel:WORD_1 src1_sel:DWORD
+; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v3
+; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GCN-NEXT: v_cndmask_b32_e64 v5, v2, 0, vcc
; GCN-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5]
-; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v3
-; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v3, s6 src0_sel:WORD_1 src1_sel:DWORD
; GCN-NEXT: v_cndmask_b32_e64 v6, v3, 0, vcc
+; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v3, s6 src0_sel:WORD_1 src1_sel:DWORD
+; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v4
+; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v5
; GCN-NEXT: v_cndmask_b32_e64 v3, v7, 0, s[4:5]
-; GCN-NEXT: s_mov_b32 s4, 0x5040100
-; GCN-NEXT: v_perm_b32 v0, v0, v8, s4
-; GCN-NEXT: v_perm_b32 v1, v1, v4, s4
-; GCN-NEXT: v_perm_b32 v2, v2, v5, s4
-; GCN-NEXT: v_perm_b32 v3, v3, v6, s4
+; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v8
+; GCN-NEXT: v_lshl_or_b32 v2, v2, 16, v4
+; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v6
+; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v7
+; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v4
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
%fcmp = fcmp olt <8 x half> %a, zeroinitializer
;
; GFX8-LABEL: s_sext_inreg_i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 3, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 3
; GFX8-NEXT: s_sext_i32_i8 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, 3
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_sext_inreg_i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 3, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 3
; GFX9-NEXT: s_sext_i32_i8 s0, s0
; GFX9-NEXT: s_ashr_i32 s0, s0, 3
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_sext_inreg_i8:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 3, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_sext_inreg_i8_6:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 6, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 6
; GFX8-NEXT: s_sext_i32_i8 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, 6
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_sext_inreg_i8_6:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 6, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 6
; GFX9-NEXT: s_sext_i32_i8 s0, s0
; GFX9-NEXT: s_ashr_i32 s0, s0, 6
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_sext_inreg_i8_6:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 6, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_sext_inreg_i16_9:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 9, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 9
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, 9
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_sext_inreg_i16_9:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 9, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 9
; GFX9-NEXT: s_sext_i32_i16 s0, s0
; GFX9-NEXT: s_ashr_i32 s0, s0, 9
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_sext_inreg_i16_9:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9
; GFX10PLUS-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: s_sext_inreg_i16_15:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
+; GFX8-NEXT: s_lshl_b32 s0, s0, 15
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s0, s0, 15
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_sext_inreg_i16_15:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
+; GFX9-NEXT: s_lshl_b32 s0, s0, 15
; GFX9-NEXT: s_sext_i32_i16 s0, s0
; GFX9-NEXT: s_ashr_i32 s0, s0, 15
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_sext_inreg_i16_15:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15
; GFX10PLUS-NEXT: ; return to shader part epilog
; GFX8-LABEL: s_sext_inreg_v2i16_11:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
-; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
+; GFX8-NEXT: s_lshl_b32 s0, s0, 11
+; GFX8-NEXT: s_lshl_b32 s1, s1, 11
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_ashr_i32 s0, s0, 11
;
; GFX8-LABEL: s_sext_inreg_v4i16_14:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s0, -1, 0x100000
-; GFX8-NEXT: s_mov_b32 s1, s0
+; GFX8-NEXT: s_mov_b32 s0, 0xffff
+; GFX8-NEXT: s_mov_b32 s1, 0xffff
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_sext_inreg_v4i16_14:
; GFX8-LABEL: v_sext_inreg_v8i16_11:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: s_bfe_u32 s4, -1, 0x100000
-; GFX8-NEXT: v_mov_b32_e32 v0, s4
-; GFX8-NEXT: v_mov_b32_e32 v1, s4
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_mov_b32_e32 v3, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff
+; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff
+; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff
+; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_sext_inreg_v8i16_11:
; GFX8-LABEL: s_sext_inreg_v8i16_5:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
-; GFX8-NEXT: s_bfe_u32 s8, 10, 0x100000
-; GFX8-NEXT: s_lshl_b32 s4, s4, s8
+; GFX8-NEXT: s_lshl_b32 s4, s4, 10
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GFX8-NEXT: s_lshl_b32 s5, s5, s8
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_lshl_b32 s0, s0, 10
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_lshl_b32 s5, s5, 10
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
+; GFX8-NEXT: s_lshl_b32 s1, s1, 10
; GFX8-NEXT: s_or_b32 s0, s0, s4
-; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
-; GFX8-NEXT: s_lshl_b32 s6, s6, s8
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
+; GFX8-NEXT: s_lshl_b32 s6, s6, 10
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
-; GFX8-NEXT: s_lshl_b32 s2, s2, s8
+; GFX8-NEXT: s_lshl_b32 s2, s2, 10
; GFX8-NEXT: s_or_b32 s1, s1, s4
-; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000
-; GFX8-NEXT: s_lshl_b32 s7, s7, s8
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
+; GFX8-NEXT: s_lshl_b32 s7, s7, 10
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
-; GFX8-NEXT: s_lshl_b32 s3, s3, s8
+; GFX8-NEXT: s_lshl_b32 s3, s3, 10
; GFX8-NEXT: s_or_b32 s2, s2, s4
-; GFX8-NEXT: s_bfe_u32 s4, s7, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s3, s3, s4
; GFX8-NEXT: ; return to shader part epilog
}
define amdgpu_ps i65 @s_sext_inreg_i65_33(i65 inreg %value) {
-; GFX6-LABEL: s_sext_inreg_i65_33:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_mov_b32 s0, 0
-; GFX6-NEXT: s_mov_b32 s1, 0
-; GFX6-NEXT: s_mov_b32 s2, 0
-; GFX6-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: s_sext_inreg_i65_33:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s0, 1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, 2, 0x100000
-; GFX8-NEXT: s_lshr_b32 s0, 0, s0
-; GFX8-NEXT: s_lshr_b32 s1, 0, s1
-; GFX8-NEXT: s_bfe_u32 s2, 3, 0x100000
-; GFX8-NEXT: s_and_b32 s0, s0, 1
-; GFX8-NEXT: s_and_b32 s1, s1, 1
-; GFX8-NEXT: s_lshr_b32 s2, 0, s2
-; GFX8-NEXT: s_lshl_b32 s0, s0, 17
-; GFX8-NEXT: s_lshl_b32 s1, s1, 18
-; GFX8-NEXT: s_bfe_u32 s3, 4, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s2, 1
-; GFX8-NEXT: s_lshr_b32 s3, 0, s3
-; GFX8-NEXT: s_lshl_b32 s1, s1, 19
-; GFX8-NEXT: s_bfe_u32 s4, 5, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s3, 1
-; GFX8-NEXT: s_lshr_b32 s4, 0, s4
-; GFX8-NEXT: s_lshl_b32 s1, s1, 20
-; GFX8-NEXT: s_bfe_u32 s5, 6, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s4, 1
-; GFX8-NEXT: s_lshr_b32 s5, 0, s5
-; GFX8-NEXT: s_lshl_b32 s1, s1, 21
-; GFX8-NEXT: s_bfe_u32 s6, 7, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s5, 1
-; GFX8-NEXT: s_lshr_b32 s6, 0, s6
-; GFX8-NEXT: s_lshl_b32 s1, s1, 22
-; GFX8-NEXT: s_bfe_u32 s7, 8, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s6, 1
-; GFX8-NEXT: s_lshr_b32 s7, 0, s7
-; GFX8-NEXT: s_lshl_b32 s1, s1, 23
-; GFX8-NEXT: s_bfe_u32 s8, 9, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s7, 1
-; GFX8-NEXT: s_lshr_b32 s8, 0, s8
-; GFX8-NEXT: s_lshl_b32 s1, s1, 24
-; GFX8-NEXT: s_bfe_u32 s9, 10, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s8, 1
-; GFX8-NEXT: s_lshr_b32 s9, 0, s9
-; GFX8-NEXT: s_lshl_b32 s1, s1, 25
-; GFX8-NEXT: s_bfe_u32 s10, 11, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s9, 1
-; GFX8-NEXT: s_lshr_b32 s10, 0, s10
-; GFX8-NEXT: s_lshl_b32 s1, s1, 26
-; GFX8-NEXT: s_bfe_u32 s11, 12, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s10, 1
-; GFX8-NEXT: s_lshr_b32 s11, 0, s11
-; GFX8-NEXT: s_lshl_b32 s1, s1, 27
-; GFX8-NEXT: s_bfe_u32 s12, 13, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s11, 1
-; GFX8-NEXT: s_lshr_b32 s12, 0, s12
-; GFX8-NEXT: s_lshl_b32 s1, s1, 28
-; GFX8-NEXT: s_bfe_u32 s13, 14, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s12, 1
-; GFX8-NEXT: s_lshr_b32 s13, 0, s13
-; GFX8-NEXT: s_lshl_b32 s1, s1, 29
-; GFX8-NEXT: s_bfe_u32 s14, 15, 0x100000
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s13, 1
-; GFX8-NEXT: s_lshr_b32 s14, 0, s14
-; GFX8-NEXT: s_lshl_b32 s1, s1, 30
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_and_b32 s1, s14, 1
-; GFX8-NEXT: s_lshl_b32 s1, s1, 31
-; GFX8-NEXT: s_or_b32 s0, s0, s1
-; GFX8-NEXT: s_mov_b32 s1, s0
-; GFX8-NEXT: s_mov_b32 s2, 0
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX9-LABEL: s_sext_inreg_i65_33:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s0, 1, 0x100000
-; GFX9-NEXT: s_bfe_u32 s1, 2, 0x100000
-; GFX9-NEXT: s_lshr_b32 s0, 0, s0
-; GFX9-NEXT: s_lshr_b32 s1, 0, s1
-; GFX9-NEXT: s_bfe_u32 s2, 3, 0x100000
-; GFX9-NEXT: s_and_b32 s0, s0, 1
-; GFX9-NEXT: s_and_b32 s1, s1, 1
-; GFX9-NEXT: s_lshr_b32 s2, 0, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, 17
-; GFX9-NEXT: s_lshl_b32 s1, s1, 18
-; GFX9-NEXT: s_bfe_u32 s3, 4, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s2, 1
-; GFX9-NEXT: s_lshr_b32 s3, 0, s3
-; GFX9-NEXT: s_lshl_b32 s1, s1, 19
-; GFX9-NEXT: s_bfe_u32 s4, 5, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s3, 1
-; GFX9-NEXT: s_lshr_b32 s4, 0, s4
-; GFX9-NEXT: s_lshl_b32 s1, s1, 20
-; GFX9-NEXT: s_bfe_u32 s5, 6, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s4, 1
-; GFX9-NEXT: s_lshr_b32 s5, 0, s5
-; GFX9-NEXT: s_lshl_b32 s1, s1, 21
-; GFX9-NEXT: s_bfe_u32 s6, 7, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s5, 1
-; GFX9-NEXT: s_lshr_b32 s6, 0, s6
-; GFX9-NEXT: s_lshl_b32 s1, s1, 22
-; GFX9-NEXT: s_bfe_u32 s7, 8, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s6, 1
-; GFX9-NEXT: s_lshr_b32 s7, 0, s7
-; GFX9-NEXT: s_lshl_b32 s1, s1, 23
-; GFX9-NEXT: s_bfe_u32 s8, 9, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s7, 1
-; GFX9-NEXT: s_lshr_b32 s8, 0, s8
-; GFX9-NEXT: s_lshl_b32 s1, s1, 24
-; GFX9-NEXT: s_bfe_u32 s9, 10, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s8, 1
-; GFX9-NEXT: s_lshr_b32 s9, 0, s9
-; GFX9-NEXT: s_lshl_b32 s1, s1, 25
-; GFX9-NEXT: s_bfe_u32 s10, 11, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s9, 1
-; GFX9-NEXT: s_lshr_b32 s10, 0, s10
-; GFX9-NEXT: s_lshl_b32 s1, s1, 26
-; GFX9-NEXT: s_bfe_u32 s11, 12, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s10, 1
-; GFX9-NEXT: s_lshr_b32 s11, 0, s11
-; GFX9-NEXT: s_lshl_b32 s1, s1, 27
-; GFX9-NEXT: s_bfe_u32 s12, 13, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s11, 1
-; GFX9-NEXT: s_lshr_b32 s12, 0, s12
-; GFX9-NEXT: s_lshl_b32 s1, s1, 28
-; GFX9-NEXT: s_bfe_u32 s13, 14, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s12, 1
-; GFX9-NEXT: s_lshr_b32 s13, 0, s13
-; GFX9-NEXT: s_lshl_b32 s1, s1, 29
-; GFX9-NEXT: s_bfe_u32 s14, 15, 0x100000
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s13, 1
-; GFX9-NEXT: s_lshr_b32 s14, 0, s14
-; GFX9-NEXT: s_lshl_b32 s1, s1, 30
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_and_b32 s1, s14, 1
-; GFX9-NEXT: s_lshl_b32 s1, s1, 31
-; GFX9-NEXT: s_or_b32 s0, s0, s1
-; GFX9-NEXT: s_mov_b32 s1, s0
-; GFX9-NEXT: s_mov_b32 s2, 0
-; GFX9-NEXT: ; return to shader part epilog
+; GCN-LABEL: s_sext_inreg_i65_33:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_mov_b32 s0, 0
+; GCN-NEXT: s_mov_b32 s1, 0
+; GCN-NEXT: s_mov_b32 s2, 0
+; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_sext_inreg_i65_33:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s0, 1, 0x100000
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 2, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s0, 0, s0
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 3, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s1, 0, s1
-; GFX10PLUS-NEXT: s_lshr_b32 s2, 0, s2
-; GFX10PLUS-NEXT: s_bfe_u32 s3, 4, 0x100000
-; GFX10PLUS-NEXT: s_and_b32 s0, s0, 1
-; GFX10PLUS-NEXT: s_and_b32 s1, s1, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s4, 5, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s3, 0, s3
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 17
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 18
-; GFX10PLUS-NEXT: s_and_b32 s2, s2, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s4, 0, s4
-; GFX10PLUS-NEXT: s_bfe_u32 s5, 6, 0x100000
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 19
-; GFX10PLUS-NEXT: s_and_b32 s2, s3, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s6, 7, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s5, 0, s5
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 20
-; GFX10PLUS-NEXT: s_and_b32 s2, s4, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s6, 0, s6
-; GFX10PLUS-NEXT: s_bfe_u32 s7, 8, 0x100000
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 21
-; GFX10PLUS-NEXT: s_and_b32 s2, s5, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s8, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s7, 0, s7
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 22
-; GFX10PLUS-NEXT: s_and_b32 s2, s6, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s8, 0, s8
-; GFX10PLUS-NEXT: s_bfe_u32 s9, 10, 0x100000
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 23
-; GFX10PLUS-NEXT: s_and_b32 s2, s7, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s10, 11, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s9, 0, s9
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 24
-; GFX10PLUS-NEXT: s_and_b32 s2, s8, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s10, 0, s10
-; GFX10PLUS-NEXT: s_bfe_u32 s11, 12, 0x100000
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 25
-; GFX10PLUS-NEXT: s_and_b32 s2, s9, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s12, 13, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s11, 0, s11
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 26
-; GFX10PLUS-NEXT: s_and_b32 s2, s10, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s12, 0, s12
-; GFX10PLUS-NEXT: s_bfe_u32 s13, 14, 0x100000
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 27
-; GFX10PLUS-NEXT: s_and_b32 s2, s11, 1
-; GFX10PLUS-NEXT: s_bfe_u32 s14, 15, 0x100000
-; GFX10PLUS-NEXT: s_lshr_b32 s13, 0, s13
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 28
-; GFX10PLUS-NEXT: s_and_b32 s2, s12, 1
-; GFX10PLUS-NEXT: s_lshr_b32 s14, 0, s14
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 29
-; GFX10PLUS-NEXT: s_and_b32 s2, s13, 1
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 30
-; GFX10PLUS-NEXT: s_and_b32 s2, s14, 1
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 31
+; GFX10PLUS-NEXT: s_mov_b32 s0, 0
+; GFX10PLUS-NEXT: s_mov_b32 s1, 0
; GFX10PLUS-NEXT: s_mov_b32 s2, 0
-; GFX10PLUS-NEXT: s_or_b32 s0, s0, s1
-; GFX10PLUS-NEXT: s_mov_b32 s1, s0
; GFX10PLUS-NEXT: ; return to shader part epilog
%shl = shl i65 %value, 33
%ashr = shl i65 %shl, 33
; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff, v0
; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 2, v0
-; GFX10PLUS-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
%and = and i16 %x, 16383
%ext = zext i16 %and to i32
}
define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
-; GFX6-LABEL: s_shl_i8_7:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_lshl_b32 s0, s0, 7
-; GFX6-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: s_shl_i8_7:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 7, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX9-LABEL: s_shl_i8_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 7, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
-; GFX9-NEXT: ; return to shader part epilog
+; GCN-LABEL: s_shl_i8_7:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_lshl_b32 s0, s0, 7
+; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i8_7:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 7, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i8 %value, 7
ret i8 %result
}
define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
-; GFX6-LABEL: s_shl_i16_15:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_lshl_b32 s0, s0, 15
-; GFX6-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: s_shl_i16_15:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s1
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX9-LABEL: s_shl_i16_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX9-NEXT: s_lshl_b32 s0, s0, s1
-; GFX9-NEXT: ; return to shader part epilog
+; GCN-LABEL: s_shl_i16_15:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_lshl_b32 s0, s0, 15
+; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i16_15:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s1, 15, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i16 %value, 15
ret i16 %result
;
; GFX8-LABEL: s_ssubsat_i7:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_sext_i32_i16 s3, s0
-; GFX8-NEXT: s_sext_i32_i16 s4, -1
-; GFX8-NEXT: s_max_i32 s5, s3, s4
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff
-; GFX8-NEXT: s_min_i32 s3, s3, s4
-; GFX8-NEXT: s_sext_i32_i16 s4, s5
+; GFX8-NEXT: s_lshl_b32 s0, s0, 9
+; GFX8-NEXT: s_sext_i32_i16 s2, s0
+; GFX8-NEXT: s_sext_i32_i16 s3, -1
+; GFX8-NEXT: s_max_i32 s4, s2, s3
+; GFX8-NEXT: s_lshl_b32 s1, s1, 9
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_min_i32 s2, s2, s3
+; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
-; GFX8-NEXT: s_max_i32 s1, s4, s1
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_min_i32 s1, s1, s3
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s0, s0, s2
+; GFX8-NEXT: s_ashr_i32 s0, s0, 9
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_ssubsat_i7:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 9
+; GFX9-NEXT: s_lshl_b32 s0, s0, 9
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp
; GFX9-NEXT: v_ashrrev_i16_e32 v0, 9, v0
;
; GFX10PLUS-LABEL: s_ssubsat_i7:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 9
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 9, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
;
; GFX8-LABEL: s_ssubsat_i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
-; GFX8-NEXT: s_sext_i32_i16 s3, s0
-; GFX8-NEXT: s_sext_i32_i16 s4, -1
-; GFX8-NEXT: s_max_i32 s5, s3, s4
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff
-; GFX8-NEXT: s_min_i32 s3, s3, s4
-; GFX8-NEXT: s_sext_i32_i16 s4, s5
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s2, s0
+; GFX8-NEXT: s_sext_i32_i16 s3, -1
+; GFX8-NEXT: s_max_i32 s4, s2, s3
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_min_i32 s2, s2, s3
+; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
-; GFX8-NEXT: s_max_i32 s1, s4, s1
+; GFX8-NEXT: s_sub_i32 s2, s2, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s3, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_min_i32 s1, s1, s3
+; GFX8-NEXT: s_sext_i32_i16 s2, s2
+; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s0, s0, s2
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_ssubsat_i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
+; GFX9-NEXT: s_lshl_b32 s0, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_sub_i16 v0, s0, v0 clamp
; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0
;
; GFX10PLUS-LABEL: s_ssubsat_i8:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 8
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 8
; GFX10PLUS-NEXT: v_sub_nc_i16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_movk_i32 s4, 0xff
-; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 clamp
;
; GFX8-LABEL: s_ssubsat_v2i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
-; GFX8-NEXT: s_sext_i32_i16 s5, s0
-; GFX8-NEXT: s_sext_i32_i16 s6, -1
-; GFX8-NEXT: s_max_i32 s7, s5, s6
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s4, s0
+; GFX8-NEXT: s_sext_i32_i16 s5, -1
+; GFX8-NEXT: s_max_i32 s6, s4, s5
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
-; GFX8-NEXT: s_sub_i32 s7, s7, 0x7fff
-; GFX8-NEXT: s_min_i32 s5, s5, s6
-; GFX8-NEXT: s_sext_i32_i16 s7, s7
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
+; GFX8-NEXT: s_min_i32 s4, s4, s5
+; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
-; GFX8-NEXT: s_max_i32 s1, s7, s1
+; GFX8-NEXT: s_sub_i32 s4, s4, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s6, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_min_i32 s1, s1, s5
+; GFX8-NEXT: s_sext_i32_i16 s4, s4
+; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_sub_i32 s0, s0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s2, s4
-; GFX8-NEXT: s_lshl_b32 s2, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s2, 8
+; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s1
-; GFX8-NEXT: s_max_i32 s5, s3, s6
-; GFX8-NEXT: s_sub_i32 s5, s5, 0x7fff
-; GFX8-NEXT: s_min_i32 s3, s3, s6
-; GFX8-NEXT: s_sext_i32_i16 s5, s5
+; GFX8-NEXT: s_max_i32 s4, s3, s5
+; GFX8-NEXT: s_sub_i32 s4, s4, 0x7fff
+; GFX8-NEXT: s_min_i32 s3, s3, s5
+; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sub_i32 s3, s3, 0xffff8000
-; GFX8-NEXT: s_max_i32 s2, s5, s2
+; GFX8-NEXT: s_max_i32 s2, s4, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sub_i32 s1, s1, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s1, s1, s4
-; GFX8-NEXT: s_ashr_i32 s0, s0, s4
+; GFX8-NEXT: s_ashr_i32 s1, s1, 8
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: v_ssubsat_v4i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX9-NEXT: v_perm_b32 v2, v2, v0, s4
+; GFX9-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX9-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX9-NEXT: v_perm_b32 v3, v4, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v6
+; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX9-NEXT: v_alignbit_b32 v1, v5, v1, 16
; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX10-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v1
+; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX10-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX10-NEXT: v_perm_b32 v3, v4, v1, 0x5040100
-; GFX10-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 24
-; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
+; GFX10-NEXT: v_lshl_or_b32 v3, v5, 16, v6
+; GFX10-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_i16 v2, v2, v3 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX11-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v3, v1, 0x5040100
-; GFX11-NEXT: v_alignbit_b32 v0, v4, v0, 16
-; GFX11-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v2, 16, v4
+; GFX11-NEXT: v_lshl_or_b32 v3, v3, 16, v5
+; GFX11-NEXT: v_alignbit_b32 v0, v6, v0, 16
+; GFX11-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX11-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
;
; GFX8-LABEL: s_ssubsat_v4i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
-; GFX8-NEXT: s_sext_i32_i16 s9, s0
-; GFX8-NEXT: s_sext_i32_i16 s10, -1
-; GFX8-NEXT: s_max_i32 s11, s9, s10
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
+; GFX8-NEXT: s_sext_i32_i16 s8, s0
+; GFX8-NEXT: s_sext_i32_i16 s9, -1
+; GFX8-NEXT: s_max_i32 s10, s8, s9
; GFX8-NEXT: s_lshr_b32 s5, s1, 8
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
-; GFX8-NEXT: s_sub_i32 s11, s11, 0x7fff
-; GFX8-NEXT: s_min_i32 s9, s9, s10
-; GFX8-NEXT: s_sext_i32_i16 s11, s11
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_sub_i32 s10, s10, 0x7fff
+; GFX8-NEXT: s_min_i32 s8, s8, s9
+; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s9, s9, 0xffff8000
-; GFX8-NEXT: s_max_i32 s1, s11, s1
+; GFX8-NEXT: s_sub_i32 s8, s8, 0xffff8000
+; GFX8-NEXT: s_max_i32 s1, s10, s1
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s9, s9
-; GFX8-NEXT: s_min_i32 s1, s1, s9
+; GFX8-NEXT: s_sext_i32_i16 s8, s8
+; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_sub_i32 s0, s0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s2, s8
-; GFX8-NEXT: s_lshl_b32 s2, s5, s8
+; GFX8-NEXT: s_lshl_b32 s1, s2, 8
+; GFX8-NEXT: s_lshl_b32 s2, s5, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_max_i32 s9, s5, s10
-; GFX8-NEXT: s_sub_i32 s9, s9, 0x7fff
-; GFX8-NEXT: s_min_i32 s5, s5, s10
-; GFX8-NEXT: s_sext_i32_i16 s9, s9
+; GFX8-NEXT: s_max_i32 s8, s5, s9
+; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
+; GFX8-NEXT: s_min_i32 s5, s5, s9
+; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
-; GFX8-NEXT: s_max_i32 s2, s9, s2
+; GFX8-NEXT: s_max_i32 s2, s8, s2
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s2, s3, s8
+; GFX8-NEXT: s_lshl_b32 s2, s3, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s2
-; GFX8-NEXT: s_lshl_b32 s3, s6, s8
-; GFX8-NEXT: s_max_i32 s6, s5, s10
+; GFX8-NEXT: s_lshl_b32 s3, s6, 8
+; GFX8-NEXT: s_max_i32 s6, s5, s9
; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
-; GFX8-NEXT: s_min_i32 s5, s5, s10
+; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s3
-; GFX8-NEXT: s_lshl_b32 s3, s4, s8
+; GFX8-NEXT: s_lshl_b32 s3, s4, 8
; GFX8-NEXT: s_sext_i32_i16 s5, s3
-; GFX8-NEXT: s_max_i32 s6, s5, s10
-; GFX8-NEXT: s_lshl_b32 s4, s7, s8
+; GFX8-NEXT: s_max_i32 s6, s5, s9
+; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_sub_i32 s6, s6, 0x7fff
-; GFX8-NEXT: s_min_i32 s5, s5, s10
+; GFX8-NEXT: s_min_i32 s5, s5, s9
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s4, s6, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_ashr_i32 s1, s1, s8
+; GFX8-NEXT: s_ashr_i32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_ashr_i32 s0, s0, s8
+; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s4, s4, s5
; GFX8-NEXT: s_and_b32 s1, s1, 0xff
-; GFX8-NEXT: s_ashr_i32 s2, s2, s8
+; GFX8-NEXT: s_ashr_i32 s2, s2, 8
; GFX8-NEXT: s_sub_i32 s3, s3, s4
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s2, 0xff
-; GFX8-NEXT: s_ashr_i32 s3, s3, s8
+; GFX8-NEXT: s_ashr_i32 s3, s3, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: s_and_b32 s1, s3, 0xff
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_min_i32 s1, s3, s1
; GFX8-NEXT: s_sub_i32 s1, s2, s1
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
; GFX8-NEXT: s_or_b32 s0, s0, s1
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_min_i32 s3, s4, s3
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_sub_i32 s3, s5, s3
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s0, s0, s2
-; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s1, s1, s2
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s7, s11
; GFX8-NEXT: s_sub_i32 s5, s5, 0xffff8000
; GFX8-NEXT: s_max_i32 s6, s6, s7
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_min_i32 s5, s6, s5
; GFX8-NEXT: s_or_b32 s0, s0, s3
-; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
; GFX8-NEXT: s_sub_i32 s5, s8, s5
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_or_b32 s1, s1, s3
-; GFX8-NEXT: s_bfe_u32 s3, s5, 0x100000
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s5
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_or_b32 s2, s2, s3
; GFX8-NEXT: ; return to shader part epilog
; GFX8-NEXT: s_sext_i32_i16 s7, s11
; GFX8-NEXT: s_max_i32 s8, s7, s17
; GFX8-NEXT: s_sub_i32 s8, s8, 0x7fff
-; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
; GFX8-NEXT: s_min_i32 s7, s7, s17
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s9, s15
-; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
+; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_sub_i32 s7, s7, 0xffff8000
; GFX8-NEXT: s_max_i32 s8, s8, s9
; GFX8-NEXT: s_or_b32 s0, s0, s4
-; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
; GFX8-NEXT: s_sub_i32 s6, s10, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s7, s7
-; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
+; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_min_i32 s7, s8, s7
; GFX8-NEXT: s_or_b32 s1, s1, s4
-; GFX8-NEXT: s_bfe_u32 s4, s6, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
; GFX8-NEXT: s_sub_i32 s7, s11, s7
-; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s2, s2, s4
-; GFX8-NEXT: s_bfe_u32 s4, s7, 0x100000
-; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
+; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
; GFX8-NEXT: s_or_b32 s3, s3, s4
; GFX8-NEXT: ; return to shader part epilog
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
-; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
-; GFX9-NEXT: s_lshr_b32 s3, s3, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s2
-; GFX9-NEXT: s_lshr_b32 s1, s4, 16
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
+; GFX9-NEXT: s_lshr_b32 s0, s4, 16
; GFX9-NEXT: ds_write_b8 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:1
-; GFX9-NEXT: s_lshr_b32 s2, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:1
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: ds_write_b8 v1, v0 offset:2
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: s_bfe_u32 s2, s5, 0x100000
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s5
; GFX9-NEXT: ds_write_b8 v1, v0 offset:3
-; GFX9-NEXT: s_lshr_b32 s2, s2, s0
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-NEXT: s_lshr_b32 s1, s5, 16
+; GFX9-NEXT: s_lshr_b32 s0, s5, 16
; GFX9-NEXT: ds_write_b8 v1, v0 offset:4
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:5
-; GFX9-NEXT: s_lshr_b32 s2, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:5
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: ds_write_b8 v1, v0 offset:6
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: s_bfe_u32 s2, s6, 0x100000
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s6
; GFX9-NEXT: ds_write_b8 v1, v0 offset:7
-; GFX9-NEXT: s_lshr_b32 s2, s2, s0
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: s_lshr_b32 s1, s6, 16
+; GFX9-NEXT: s_lshr_b32 s0, s6, 16
; GFX9-NEXT: ds_write_b8 v1, v0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:9
-; GFX9-NEXT: s_lshr_b32 s2, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:9
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: ds_write_b8 v1, v0 offset:10
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: s_bfe_u32 s2, s7, 0x100000
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s7
; GFX9-NEXT: ds_write_b8 v1, v0 offset:11
-; GFX9-NEXT: s_lshr_b32 s2, s2, s0
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s7
-; GFX9-NEXT: s_lshr_b32 s1, s7, 16
+; GFX9-NEXT: s_lshr_b32 s0, s7, 16
; GFX9-NEXT: ds_write_b8 v1, v0 offset:12
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:13
-; GFX9-NEXT: s_lshr_b32 s0, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:14
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:13
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:14
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: ds_write_b8 v1, v0 offset:15
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
-; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX10-NEXT: s_lshr_b32 s1, s4, 16
+; GFX10-NEXT: s_lshr_b32 s0, s4, 16
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s4
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s2
; GFX10-NEXT: s_lshr_b32 s2, s5, 16
-; GFX10-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s5
; GFX10-NEXT: v_mov_b32_e32 v2, s5
-; GFX10-NEXT: s_lshr_b32 s3, s3, s0
-; GFX10-NEXT: s_lshr_b32 s5, s6, 16
-; GFX10-NEXT: s_bfe_u32 s8, s6, 0x100000
+; GFX10-NEXT: s_lshr_b32 s1, s1, 8
+; GFX10-NEXT: v_mov_b32_e32 v4, s0
+; GFX10-NEXT: s_lshr_b32 s4, s6, 16
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s6
-; GFX10-NEXT: s_lshr_b32 s6, s1, s0
-; GFX10-NEXT: v_mov_b32_e32 v4, s1
-; GFX10-NEXT: s_lshr_b32 s1, s4, s0
-; GFX10-NEXT: s_lshr_b32 s4, s2, s0
-; GFX10-NEXT: v_mov_b32_e32 v6, s3
-; GFX10-NEXT: v_mov_b32_e32 v7, s6
+; GFX10-NEXT: s_lshr_b32 s6, s0, 8
+; GFX10-NEXT: s_lshr_b32 s0, s3, 8
+; GFX10-NEXT: s_lshr_b32 s3, s2, 8
; GFX10-NEXT: v_mov_b32_e32 v5, s2
-; GFX10-NEXT: s_lshr_b32 s2, s8, s0
-; GFX10-NEXT: v_mov_b32_e32 v8, s1
-; GFX10-NEXT: v_mov_b32_e32 v9, s4
+; GFX10-NEXT: v_mov_b32_e32 v6, s1
+; GFX10-NEXT: s_lshr_b32 s2, s5, 8
+; GFX10-NEXT: v_mov_b32_e32 v7, s6
+; GFX10-NEXT: v_mov_b32_e32 v8, s0
+; GFX10-NEXT: v_mov_b32_e32 v9, s3
; GFX10-NEXT: ds_write_b8 v1, v0
; GFX10-NEXT: ds_write_b8 v1, v2 offset:4
; GFX10-NEXT: ds_write_b8 v1, v4 offset:2
+; GFX10-NEXT: ds_write_b8 v1, v5 offset:6
; GFX10-NEXT: ds_write_b8 v1, v6 offset:1
; GFX10-NEXT: ds_write_b8 v1, v7 offset:3
; GFX10-NEXT: ds_write_b8 v1, v8 offset:5
-; GFX10-NEXT: ds_write_b8 v1, v5 offset:6
-; GFX10-NEXT: v_mov_b32_e32 v0, s5
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v10, s2
-; GFX10-NEXT: s_lshr_b32 s1, s5, s0
+; GFX10-NEXT: s_lshr_b32 s0, s4, 8
; GFX10-NEXT: ds_write_b8 v1, v9 offset:7
; GFX10-NEXT: ds_write_b8 v1, v3 offset:8
; GFX10-NEXT: ds_write_b8 v1, v10 offset:9
; GFX10-NEXT: ds_write_b8 v1, v0 offset:10
-; GFX10-NEXT: v_mov_b32_e32 v0, s1
-; GFX10-NEXT: s_bfe_u32 s1, s7, 0x100000
-; GFX10-NEXT: s_lshr_b32 s2, s7, 16
-; GFX10-NEXT: s_lshr_b32 s1, s1, s0
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: s_and_b32 s0, 0xffff, s7
+; GFX10-NEXT: s_lshr_b32 s1, s7, 16
+; GFX10-NEXT: s_lshr_b32 s0, s0, 8
; GFX10-NEXT: v_mov_b32_e32 v2, s7
-; GFX10-NEXT: v_mov_b32_e32 v3, s1
-; GFX10-NEXT: s_lshr_b32 s0, s2, s0
-; GFX10-NEXT: v_mov_b32_e32 v4, s2
+; GFX10-NEXT: v_mov_b32_e32 v3, s0
+; GFX10-NEXT: s_lshr_b32 s0, s1, 8
+; GFX10-NEXT: v_mov_b32_e32 v4, s1
; GFX10-NEXT: v_mov_b32_e32 v5, s0
; GFX10-NEXT: ds_write_b8 v1, v0 offset:11
; GFX10-NEXT: ds_write_b8 v1, v2 offset:12
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-NEXT: s_bfe_u32 s1, 8, 0x100000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX11-NEXT: s_lshr_b32 s2, s4, 16
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s4
+; GFX11-NEXT: s_lshr_b32 s1, s4, 16
+; GFX11-NEXT: s_lshr_b32 s2, s2, 8
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: s_lshr_b32 s0, s5, 16
-; GFX11-NEXT: s_lshr_b32 s3, s3, s1
-; GFX11-NEXT: s_bfe_u32 s4, s5, 0x100000
-; GFX11-NEXT: s_bfe_u32 s8, s6, 0x100000
-; GFX11-NEXT: s_lshr_b32 s9, s2, s1
-; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s3
-; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s2
-; GFX11-NEXT: s_lshr_b32 s5, s6, 16
-; GFX11-NEXT: s_lshr_b32 s2, s4, s1
-; GFX11-NEXT: s_lshr_b32 s4, s0, s1
-; GFX11-NEXT: s_lshr_b32 s0, s8, s1
-; GFX11-NEXT: v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v7, s2
-; GFX11-NEXT: v_mov_b32_e32 v8, s4
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s5
+; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s6
+; GFX11-NEXT: s_lshr_b32 s4, s6, 16
+; GFX11-NEXT: s_and_b32 s5, 0xffff, s6
+; GFX11-NEXT: s_lshr_b32 s6, s1, 8
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s6
+; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v5, s0
+; GFX11-NEXT: s_lshr_b32 s1, s3, 8
+; GFX11-NEXT: s_lshr_b32 s3, s0, 8
+; GFX11-NEXT: s_lshr_b32 s0, s5, 8
+; GFX11-NEXT: v_dual_mov_b32 v8, s1 :: v_dual_mov_b32 v9, s3
; GFX11-NEXT: ds_store_b8 v1, v0
-; GFX11-NEXT: ds_store_b8 v1, v5 offset:1
-; GFX11-NEXT: ds_store_b8 v1, v3 offset:2
-; GFX11-NEXT: ds_store_b8 v1, v6 offset:3
+; GFX11-NEXT: ds_store_b8 v1, v6 offset:1
+; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
+; GFX11-NEXT: ds_store_b8 v1, v7 offset:3
; GFX11-NEXT: ds_store_b8 v1, v2 offset:4
-; GFX11-NEXT: ds_store_b8 v1, v7 offset:5
-; GFX11-NEXT: ds_store_b8 v1, v4 offset:6
-; GFX11-NEXT: ds_store_b8 v1, v8 offset:7
-; GFX11-NEXT: v_mov_b32_e32 v3, s5
-; GFX11-NEXT: s_lshr_b32 s2, s7, 16
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v7, s2
-; GFX11-NEXT: s_lshr_b32 s0, s5, s1
-; GFX11-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v5, s7
+; GFX11-NEXT: ds_store_b8 v1, v8 offset:5
+; GFX11-NEXT: ds_store_b8 v1, v5 offset:6
+; GFX11-NEXT: ds_store_b8 v1, v9 offset:7
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v5, s7
+; GFX11-NEXT: s_lshr_b32 s0, s4, 8
+; GFX11-NEXT: s_lshr_b32 s1, s7, 16
; GFX11-NEXT: v_mov_b32_e32 v4, s0
-; GFX11-NEXT: s_bfe_u32 s0, s7, 0x100000
-; GFX11-NEXT: s_lshr_b32 s0, s0, s1
+; GFX11-NEXT: s_and_b32 s0, 0xffff, s7
+; GFX11-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v7, s1
+; GFX11-NEXT: s_lshr_b32 s0, s0, 8
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v6, s0
-; GFX11-NEXT: s_lshr_b32 s0, s2, s1
+; GFX11-NEXT: s_lshr_b32 s0, s1, 8
; GFX11-NEXT: v_mov_b32_e32 v8, s0
-; GFX11-NEXT: ds_store_b8 v1, v0 offset:8
-; GFX11-NEXT: ds_store_b8 v1, v2 offset:9
-; GFX11-NEXT: ds_store_b8 v1, v3 offset:10
+; GFX11-NEXT: ds_store_b8 v1, v3 offset:8
+; GFX11-NEXT: ds_store_b8 v1, v0 offset:9
+; GFX11-NEXT: ds_store_b8 v1, v2 offset:10
; GFX11-NEXT: ds_store_b8 v1, v4 offset:11
; GFX11-NEXT: ds_store_b8 v1, v5 offset:12
; GFX11-NEXT: ds_store_b8 v1, v6 offset:13
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
-; GFX9-NEXT: s_bfe_u32 s0, 8, 0x100000
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_bfe_u32 s3, s4, 0x100000
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s4
; GFX9-NEXT: v_mov_b32_e32 v0, s4
-; GFX9-NEXT: s_lshr_b32 s3, s3, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s2
-; GFX9-NEXT: s_lshr_b32 s1, s4, 16
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
+; GFX9-NEXT: s_lshr_b32 s0, s4, 16
; GFX9-NEXT: ds_write_b8 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v0, s3
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:1
-; GFX9-NEXT: s_lshr_b32 s2, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:1
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: ds_write_b8 v1, v0 offset:2
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: s_bfe_u32 s2, s5, 0x100000
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s5
; GFX9-NEXT: ds_write_b8 v1, v0 offset:3
-; GFX9-NEXT: s_lshr_b32 s2, s2, s0
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-NEXT: s_lshr_b32 s1, s5, 16
+; GFX9-NEXT: s_lshr_b32 s0, s5, 16
; GFX9-NEXT: ds_write_b8 v1, v0 offset:4
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:5
-; GFX9-NEXT: s_lshr_b32 s2, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:5
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: ds_write_b8 v1, v0 offset:6
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: s_bfe_u32 s2, s6, 0x100000
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
+; GFX9-NEXT: s_and_b32 s1, 0xffff, s6
; GFX9-NEXT: ds_write_b8 v1, v0 offset:7
-; GFX9-NEXT: s_lshr_b32 s2, s2, s0
+; GFX9-NEXT: s_lshr_b32 s1, s1, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s6
-; GFX9-NEXT: s_lshr_b32 s1, s6, 16
+; GFX9-NEXT: s_lshr_b32 s0, s6, 16
; GFX9-NEXT: ds_write_b8 v1, v0 offset:8
-; GFX9-NEXT: v_mov_b32_e32 v0, s2
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:9
-; GFX9-NEXT: s_lshr_b32 s0, s1, s0
; GFX9-NEXT: v_mov_b32_e32 v0, s1
-; GFX9-NEXT: ds_write_b8 v1, v0 offset:10
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:9
+; GFX9-NEXT: s_lshr_b32 s1, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: ds_write_b8 v1, v0 offset:10
+; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: ds_write_b8 v1, v0 offset:11
; GFX9-NEXT: s_endpgm
;
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x0
-; GFX10-NEXT: s_bfe_u32 s0, 8, 0x100000
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_lshr_b32 s1, s4, 16
-; GFX10-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_lshr_b32 s0, s4, 16
; GFX10-NEXT: v_mov_b32_e32 v1, s2
; GFX10-NEXT: s_lshr_b32 s2, s5, 16
-; GFX10-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX10-NEXT: s_and_b32 s3, 0xffff, s5
+; GFX10-NEXT: s_and_b32 s1, 0xffff, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v2, s5
-; GFX10-NEXT: s_lshr_b32 s5, s6, 16
-; GFX10-NEXT: s_bfe_u32 s7, s6, 0x100000
+; GFX10-NEXT: s_lshr_b32 s4, s6, 16
+; GFX10-NEXT: s_and_b32 s5, 0xffff, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s6
-; GFX10-NEXT: s_lshr_b32 s6, s1, s0
-; GFX10-NEXT: v_mov_b32_e32 v4, s1
-; GFX10-NEXT: s_lshr_b32 s1, s4, s0
-; GFX10-NEXT: s_lshr_b32 s4, s2, s0
-; GFX10-NEXT: s_lshr_b32 s3, s3, s0
+; GFX10-NEXT: s_lshr_b32 s6, s0, 8
+; GFX10-NEXT: v_mov_b32_e32 v4, s0
+; GFX10-NEXT: s_lshr_b32 s0, s3, 8
+; GFX10-NEXT: s_lshr_b32 s3, s2, 8
+; GFX10-NEXT: s_lshr_b32 s1, s1, 8
; GFX10-NEXT: v_mov_b32_e32 v5, s2
-; GFX10-NEXT: s_lshr_b32 s2, s7, s0
-; GFX10-NEXT: v_mov_b32_e32 v9, s4
-; GFX10-NEXT: v_mov_b32_e32 v6, s3
+; GFX10-NEXT: s_lshr_b32 s2, s5, 8
+; GFX10-NEXT: v_mov_b32_e32 v9, s3
+; GFX10-NEXT: v_mov_b32_e32 v6, s1
+; GFX10-NEXT: v_mov_b32_e32 v8, s0
+; GFX10-NEXT: v_mov_b32_e32 v10, s2
+; GFX10-NEXT: s_lshr_b32 s0, s4, 8
; GFX10-NEXT: v_mov_b32_e32 v7, s6
-; GFX10-NEXT: v_mov_b32_e32 v8, s1
; GFX10-NEXT: ds_write_b8 v1, v0
; GFX10-NEXT: ds_write_b8 v1, v2 offset:4
; GFX10-NEXT: ds_write_b8 v1, v4 offset:2
; GFX10-NEXT: ds_write_b8 v1, v6 offset:1
; GFX10-NEXT: ds_write_b8 v1, v7 offset:3
; GFX10-NEXT: ds_write_b8 v1, v8 offset:5
-; GFX10-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-NEXT: s_lshr_b32 s0, s5, s0
-; GFX10-NEXT: v_mov_b32_e32 v2, s5
-; GFX10-NEXT: v_mov_b32_e32 v4, s0
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: ds_write_b8 v1, v9 offset:7
; GFX10-NEXT: ds_write_b8 v1, v3 offset:8
-; GFX10-NEXT: ds_write_b8 v1, v0 offset:9
-; GFX10-NEXT: ds_write_b8 v1, v2 offset:10
-; GFX10-NEXT: ds_write_b8 v1, v4 offset:11
+; GFX10-NEXT: ds_write_b8 v1, v10 offset:9
+; GFX10-NEXT: ds_write_b8 v1, v0 offset:10
+; GFX10-NEXT: ds_write_b8 v1, v2 offset:11
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: store_lds_v3i32_align1:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x10
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-NEXT: s_bfe_u32 s1, 8, 0x100000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_bfe_u32 s3, s4, 0x100000
-; GFX11-NEXT: s_lshr_b32 s2, s4, 16
+; GFX11-NEXT: s_and_b32 s2, 0xffff, s4
+; GFX11-NEXT: s_lshr_b32 s1, s4, 16
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s0
+; GFX11-NEXT: s_lshr_b32 s4, s6, 16
+; GFX11-NEXT: s_lshr_b32 s2, s2, 8
; GFX11-NEXT: s_lshr_b32 s0, s5, 16
-; GFX11-NEXT: s_bfe_u32 s4, s5, 0x100000
+; GFX11-NEXT: s_and_b32 s3, 0xffff, s5
; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s6
-; GFX11-NEXT: s_lshr_b32 s5, s6, 16
-; GFX11-NEXT: s_lshr_b32 s3, s3, s1
-; GFX11-NEXT: s_bfe_u32 s7, s6, 0x100000
-; GFX11-NEXT: s_lshr_b32 s6, s2, s1
-; GFX11-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v7, s3
-; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s0
-; GFX11-NEXT: s_lshr_b32 s2, s4, s1
-; GFX11-NEXT: s_lshr_b32 s4, s0, s1
-; GFX11-NEXT: s_lshr_b32 s0, s7, s1
-; GFX11-NEXT: s_lshr_b32 s1, s5, s1
-; GFX11-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s2
-; GFX11-NEXT: v_dual_mov_b32 v10, s4 :: v_dual_mov_b32 v11, s0
-; GFX11-NEXT: v_mov_b32_e32 v12, s1
+; GFX11-NEXT: s_and_b32 s5, 0xffff, s6
+; GFX11-NEXT: s_lshr_b32 s6, s1, 8
+; GFX11-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s2
+; GFX11-NEXT: v_dual_mov_b32 v4, s1 :: v_dual_mov_b32 v5, s0
+; GFX11-NEXT: s_lshr_b32 s1, s3, 8
+; GFX11-NEXT: s_lshr_b32 s3, s0, 8
+; GFX11-NEXT: s_lshr_b32 s0, s5, 8
+; GFX11-NEXT: s_lshr_b32 s5, s4, 8
+; GFX11-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s1
+; GFX11-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v11, s0
+; GFX11-NEXT: v_mov_b32_e32 v12, s5
; GFX11-NEXT: ds_store_b8 v1, v0
; GFX11-NEXT: ds_store_b8 v1, v7 offset:1
; GFX11-NEXT: ds_store_b8 v1, v4 offset:2
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <2 x half> %val
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x half> %val
; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10
-; GFX8-NEXT: v_mov_b32_e32 v4, 16
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
%val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <4 x half> %val
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
%neg.x = fneg <2 x half> %x
%neg.y = fneg <2 x half> %y
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 25, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 25, v1
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v1, v2, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 25, v0
;
; GFX8-LABEL: s_uaddsat_i7:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 9
+; GFX8-NEXT: s_lshl_b32 s0, s0, 9
; GFX8-NEXT: v_mov_b32_e32 v0, s1
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 9, v0
;
; GFX9-LABEL: s_uaddsat_i7:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 9
+; GFX9-NEXT: s_lshl_b32 s0, s0, 9
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 9, v0
;
; GFX10PLUS-LABEL: s_uaddsat_i7:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 9
; GFX10PLUS-NEXT: v_add_nc_u16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v1, v2, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v0
;
; GFX8-LABEL: s_uaddsat_i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 8, v0
;
; GFX9-LABEL: s_uaddsat_i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
+; GFX9-NEXT: s_lshl_b32 s0, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v0
;
; GFX10PLUS-LABEL: s_uaddsat_i8:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 8
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 8
; GFX10PLUS-NEXT: v_add_nc_u16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v4, v0
; GFX6-NEXT: v_min_u32_e32 v1, v4, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
-; GFX6-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v3, v1
; GFX6-NEXT: v_min_u32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_movk_i32 s4, 0xff
-; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_add_u16 v0, v0, v1 clamp
;
; GFX8-LABEL: s_uaddsat_v2i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s3, 8
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s2, s4
+; GFX8-NEXT: s_lshl_b32 s0, s2, 8
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_add_u16_e64 v1, s0, v1 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v8, v0
; GFX6-NEXT: v_min_u32_e32 v1, v8, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v5, v1
; GFX6-NEXT: v_min_u32_e32 v2, v5, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v6
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v5, v2
; GFX6-NEXT: v_min_u32_e32 v3, v5, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 24, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v7
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v5, v3
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 24, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 24, v2
; GFX9-LABEL: v_uaddsat_v4i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX9-NEXT: v_perm_b32 v2, v2, v0, s4
+; GFX9-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX9-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX9-NEXT: v_perm_b32 v3, v4, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v6
+; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX9-NEXT: v_alignbit_b32 v1, v5, v1, 16
; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX10-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v1
+; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX10-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX10-NEXT: v_perm_b32 v3, v4, v1, 0x5040100
-; GFX10-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 24
-; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
+; GFX10-NEXT: v_lshl_or_b32 v3, v5, 16, v6
+; GFX10-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_add_u16 v2, v2, v3 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX11-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v3, v1, 0x5040100
-; GFX11-NEXT: v_alignbit_b32 v0, v4, v0, 16
-; GFX11-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v2, 16, v4
+; GFX11-NEXT: v_lshl_or_b32 v3, v3, 16, v5
+; GFX11-NEXT: v_alignbit_b32 v0, v6, v0, 16
+; GFX11-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX11-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
;
; GFX8-LABEL: s_uaddsat_v4i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s5, s1, 8
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s5, s8
+; GFX8-NEXT: s_lshl_b32 s1, s5, 8
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s2, s8
+; GFX8-NEXT: s_lshl_b32 s0, s2, 8
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_add_u16_e64 v1, s0, v1 clamp
-; GFX8-NEXT: s_lshl_b32 s1, s6, s8
+; GFX8-NEXT: s_lshl_b32 s1, s6, 8
; GFX8-NEXT: v_mov_b32_e32 v4, 0xff
-; GFX8-NEXT: s_lshl_b32 s0, s3, s8
+; GFX8-NEXT: s_lshl_b32 s0, s3, 8
; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: s_lshl_b32 s1, s7, s8
+; GFX8-NEXT: s_lshl_b32 s1, s7, 8
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_add_u16_e64 v2, s0, v2 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s4, s8
+; GFX8-NEXT: s_lshl_b32 s0, s4, 8
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v1, v2, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 8, v0
; GFX6-LABEL: v_uaddsat_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v1, v2, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: s_setpc_b64 s[30:31]
define amdgpu_ps float @uaddsat_i32_vs(i32 %lhs, i32 inreg %rhs) {
; GFX6-LABEL: uaddsat_i32_vs:
; GFX6: ; %bb.0:
-; GFX6-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v1, v0
; GFX6-NEXT: v_min_u32_e32 v1, s0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: ; return to shader part epilog
; GFX6-LABEL: v_uaddsat_v2i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v4, v0
; GFX6-NEXT: v_min_u32_e32 v2, v4, v2
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v2, v1
; GFX6-NEXT: v_min_u32_e32 v2, v2, v3
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-LABEL: v_uaddsat_v3i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v6, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v6, v0
; GFX6-NEXT: v_min_u32_e32 v3, v6, v3
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3
-; GFX6-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v3, v1
; GFX6-NEXT: v_min_u32_e32 v3, v3, v4
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
-; GFX6-NEXT: v_xor_b32_e32 v3, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v3, v2
; GFX6-NEXT: v_min_u32_e32 v3, v3, v5
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-LABEL: v_uaddsat_v4i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v8, v0
; GFX6-NEXT: v_min_u32_e32 v4, v8, v4
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v4, v1
; GFX6-NEXT: v_min_u32_e32 v4, v4, v5
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v4, v2
; GFX6-NEXT: v_min_u32_e32 v4, v4, v6
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v4, v3
; GFX6-NEXT: v_min_u32_e32 v4, v4, v7
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-LABEL: v_uaddsat_v5i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v10, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v10, v0
; GFX6-NEXT: v_min_u32_e32 v5, v10, v5
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v5, v1
; GFX6-NEXT: v_min_u32_e32 v5, v5, v6
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v5, v2
; GFX6-NEXT: v_min_u32_e32 v5, v5, v7
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v5, v3
; GFX6-NEXT: v_min_u32_e32 v5, v5, v8
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v5, v4
; GFX6-NEXT: v_min_u32_e32 v5, v5, v9
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v5
; GFX6-NEXT: s_setpc_b64 s[30:31]
; GFX6-LABEL: v_uaddsat_v16i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_xor_b32_e32 v31, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v31, v0
; GFX6-NEXT: v_min_u32_e32 v16, v31, v16
; GFX6-NEXT: buffer_load_dword v31, off, s[0:3], s32
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v16, v1
; GFX6-NEXT: v_min_u32_e32 v16, v16, v17
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v16, v2
; GFX6-NEXT: v_min_u32_e32 v16, v16, v18
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v16, v3
; GFX6-NEXT: v_min_u32_e32 v16, v16, v19
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v16, v4
; GFX6-NEXT: v_min_u32_e32 v16, v16, v20
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v5
+; GFX6-NEXT: v_not_b32_e32 v16, v5
; GFX6-NEXT: v_min_u32_e32 v16, v16, v21
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v6
+; GFX6-NEXT: v_not_b32_e32 v16, v6
; GFX6-NEXT: v_min_u32_e32 v16, v16, v22
; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v7
+; GFX6-NEXT: v_not_b32_e32 v16, v7
; GFX6-NEXT: v_min_u32_e32 v16, v16, v23
; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v8
+; GFX6-NEXT: v_not_b32_e32 v16, v8
; GFX6-NEXT: v_min_u32_e32 v16, v16, v24
; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v9
+; GFX6-NEXT: v_not_b32_e32 v16, v9
; GFX6-NEXT: v_min_u32_e32 v16, v16, v25
; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v10
+; GFX6-NEXT: v_not_b32_e32 v16, v10
; GFX6-NEXT: v_min_u32_e32 v16, v16, v26
; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v11
+; GFX6-NEXT: v_not_b32_e32 v16, v11
; GFX6-NEXT: v_min_u32_e32 v16, v16, v27
; GFX6-NEXT: v_add_i32_e32 v11, vcc, v11, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v12
+; GFX6-NEXT: v_not_b32_e32 v16, v12
; GFX6-NEXT: v_min_u32_e32 v16, v16, v28
; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v13
+; GFX6-NEXT: v_not_b32_e32 v16, v13
; GFX6-NEXT: v_min_u32_e32 v16, v16, v29
; GFX6-NEXT: v_add_i32_e32 v13, vcc, v13, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v14
+; GFX6-NEXT: v_not_b32_e32 v16, v14
; GFX6-NEXT: v_min_u32_e32 v16, v16, v30
; GFX6-NEXT: v_add_i32_e32 v14, vcc, v14, v16
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v15
+; GFX6-NEXT: v_not_b32_e32 v16, v15
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: v_min_u32_e32 v16, v16, v31
; GFX6-NEXT: v_add_i32_e32 v15, vcc, v15, v16
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v1, v2, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6: ; %bb.0:
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_xor_b32_e32 v1, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v1, v0
; GFX6-NEXT: v_min_u32_e32 v1, s0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT: v_xor_b32_e32 v4, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v4, v0
; GFX6-NEXT: v_min_u32_e32 v2, v4, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3
-; GFX6-NEXT: v_xor_b32_e32 v3, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v3, v1
; GFX6-NEXT: v_min_u32_e32 v2, v3, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e64 v2, v0, v1 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v2i16:
;
; GFX8-LABEL: s_uaddsat_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: v_add_u16_e64 v1, s2, v1 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_add_u16_e64 v1, s0, v0 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v2, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v0, v2, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: uaddsat_v2i16_sv:
; GFX6: ; %bb.0:
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v2, v0
; GFX6-NEXT: v_min_u32_e32 v2, s0, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX6-NEXT: s_lshl_b32 s0, s1, 16
-; GFX6-NEXT: v_xor_b32_e32 v2, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v2, v1
; GFX6-NEXT: v_min_u32_e32 v2, s0, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_add_u16_e64 v1, v0, s0 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: uaddsat_v2i16_vs:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
-; GFX6-NEXT: v_xor_b32_e32 v8, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v8, v0
; GFX6-NEXT: v_min_u32_e32 v4, v8, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v5
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v5, v1
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v6
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v5, v2
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v7
-; GFX6-NEXT: v_xor_b32_e32 v5, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v5, v3
; GFX6-NEXT: v_min_u32_e32 v4, v5, v4
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e64 v4, v0, v2 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v2, v1, v3 clamp
-; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v4i16:
;
; GFX8-LABEL: s_uaddsat_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s6, s2, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
+; GFX8-NEXT: s_lshr_b32 s6, s2, 16
+; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: v_mov_b32_e32 v1, s6
-; GFX8-NEXT: s_lshr_b32 s5, s1, 16
+; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-NEXT: v_add_u16_e64 v1, s4, v1 clamp
+; GFX8-NEXT: v_add_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_mov_b32_e32 v3, s7
-; GFX8-NEXT: v_mov_b32_e32 v4, 16
+; GFX8-NEXT: v_mov_b32_e32 v4, s5
; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, s3
-; GFX8-NEXT: v_add_u16_e64 v3, s5, v3 clamp
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_add_u16_e64 v2, s1, v2 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: ; return to shader part epilog
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX6-NEXT: v_xor_b32_e32 v12, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v12, v0
; GFX6-NEXT: v_min_u32_e32 v6, v12, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v7, v1
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v8
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v7, v2
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v9
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v7, v3
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v10
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v7, v4
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v11
-; GFX6-NEXT: v_xor_b32_e32 v7, -1, v5
+; GFX6-NEXT: v_not_b32_e32 v7, v5
; GFX6-NEXT: v_min_u32_e32 v6, v7, v6
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v6
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e64 v6, v0, v3 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v3, v1, v4 clamp
-; GFX8-NEXT: v_add_u16_sdwa v1, v1, v4 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v1, v1, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v4, v2, v5 clamp
-; GFX8-NEXT: v_add_u16_sdwa v2, v2, v5 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v2, v2, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v6i16:
;
; GFX8-LABEL: s_uaddsat_v6i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
-; GFX8-NEXT: s_lshr_b32 s10, s4, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
+; GFX8-NEXT: s_lshr_b32 s9, s3, 16
+; GFX8-NEXT: s_lshr_b32 s10, s4, 16
+; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_lshr_b32 s11, s5, 16
-; GFX8-NEXT: v_mov_b32_e32 v0, s3
-; GFX8-NEXT: v_add_u16_e64 v1, s6, v1 clamp
+; GFX8-NEXT: v_mov_b32_e32 v1, s9
+; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: v_mov_b32_e32 v3, s10
-; GFX8-NEXT: v_mov_b32_e32 v6, 16
-; GFX8-NEXT: s_lshr_b32 s8, s2, 16
-; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
+; GFX8-NEXT: v_mov_b32_e32 v4, s7
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: v_add_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_add_u16_e64 v3, s7, v3 clamp
+; GFX8-NEXT: v_add_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v4, s5
; GFX8-NEXT: v_mov_b32_e32 v5, s11
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_mov_b32_e32 v6, s8
+; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_add_u16_e64 v2, s1, v2 clamp
-; GFX8-NEXT: v_mov_b32_e32 v4, s5
-; GFX8-NEXT: v_add_u16_e64 v5, s8, v5 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_add_u16_e64 v4, s2, v4 clamp
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v5, v6, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v5
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: v_readfirstlane_b32 s2, v2
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8
-; GFX6-NEXT: v_xor_b32_e32 v16, -1, v0
+; GFX6-NEXT: v_not_b32_e32 v16, v0
; GFX6-NEXT: v_min_u32_e32 v8, v16, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v9
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v1
+; GFX6-NEXT: v_not_b32_e32 v9, v1
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v10
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v2
+; GFX6-NEXT: v_not_b32_e32 v9, v2
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v11
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v3
+; GFX6-NEXT: v_not_b32_e32 v9, v3
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v12
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v4
+; GFX6-NEXT: v_not_b32_e32 v9, v4
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v13
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v5
+; GFX6-NEXT: v_not_b32_e32 v9, v5
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v14
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v6
+; GFX6-NEXT: v_not_b32_e32 v9, v6
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v15
-; GFX6-NEXT: v_xor_b32_e32 v9, -1, v7
+; GFX6-NEXT: v_not_b32_e32 v9, v7
; GFX6-NEXT: v_min_u32_e32 v8, v9, v8
; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v8
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_e64 v8, v0, v4 clamp
-; GFX8-NEXT: v_add_u16_sdwa v0, v0, v4 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v4, v1, v5 clamp
-; GFX8-NEXT: v_add_u16_sdwa v1, v1, v5 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v1, v1, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v5, v2, v6 clamp
-; GFX8-NEXT: v_add_u16_sdwa v2, v2, v6 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_add_u16_sdwa v2, v2, v6 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v6, v3, v7 clamp
-; GFX8-NEXT: v_add_u16_sdwa v3, v3, v7 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v3, v3, v7 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v8i16:
;
; GFX8-LABEL: s_uaddsat_v8i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s12, s4, 16
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
-; GFX8-NEXT: s_lshr_b32 s13, s5, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s12
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
+; GFX8-NEXT: s_lshr_b32 s10, s2, 16
+; GFX8-NEXT: s_lshr_b32 s12, s4, 16
+; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
+; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
-; GFX8-NEXT: v_mov_b32_e32 v0, s4
-; GFX8-NEXT: v_add_u16_e64 v1, s8, v1 clamp
+; GFX8-NEXT: v_mov_b32_e32 v1, s12
+; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: v_mov_b32_e32 v3, s13
-; GFX8-NEXT: v_mov_b32_e32 v8, 16
-; GFX8-NEXT: s_lshr_b32 s10, s2, 16
-; GFX8-NEXT: s_lshr_b32 s11, s3, 16
-; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, s5
-; GFX8-NEXT: v_add_u16_e64 v3, s9, v3 clamp
+; GFX8-NEXT: v_mov_b32_e32 v4, s9
; GFX8-NEXT: v_mov_b32_e32 v5, s14
-; GFX8-NEXT: v_mov_b32_e32 v7, s15
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_add_u16_e64 v2, s1, v2 clamp
+; GFX8-NEXT: v_mov_b32_e32 v6, s10
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_add_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, s5
+; GFX8-NEXT: v_add_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v4, s6
-; GFX8-NEXT: v_add_u16_e64 v5, s10, v5 clamp
+; GFX8-NEXT: v_add_u16_sdwa v5, v6, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v6, s7
-; GFX8-NEXT: v_add_u16_e64 v7, s11, v7 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_mov_b32_e32 v7, s15
+; GFX8-NEXT: v_mov_b32_e32 v8, s11
+; GFX8-NEXT: v_add_u16_e64 v0, s0, v0 clamp
+; GFX8-NEXT: v_add_u16_e64 v2, s1, v2 clamp
; GFX8-NEXT: v_add_u16_e64 v4, s2, v4 clamp
; GFX8-NEXT: v_add_u16_e64 v6, s3, v6 clamp
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_add_u16_sdwa v7, v8, v7 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v5
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v7
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: v_readfirstlane_b32 s2, v2
; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s2, v3
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
; GFX9-NEXT: v_subrev_u32_e32 v4, s2, v3
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT: s_mov_b32 s0, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s0
-; GFX9-NEXT: v_perm_b32 v1, v3, v2, s0
+; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2
; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v2, v0, s[4:5]
; GFX9-NEXT: global_store_dword v2, v1, s[6:7]
; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s2
; GFX10-NEXT: v_cvt_f32_u32_e32 v1, s1
; GFX10-NEXT: s_sub_i32 s3, 0, s2
-; GFX10-NEXT: s_sub_i32 s6, 0, s1
+; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0
-; GFX10-NEXT: v_mul_lo_u32 v3, s6, v1
+; GFX10-NEXT: s_sub_i32 s3, 0, s1
+; GFX10-NEXT: v_mul_lo_u32 v3, s3, v1
; GFX10-NEXT: s_and_b32 s3, s0, 0xffff
; GFX10-NEXT: s_lshr_b32 s0, s0, 16
-; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
; GFX10-NEXT: v_mul_hi_u32 v0, s3, v0
; GFX10-NEXT: v_mul_hi_u32 v1, s0, v1
; GFX10-NEXT: v_mul_lo_u32 v2, v0, s2
-; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1
; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1
+; GFX10-NEXT: v_mul_lo_u32 v3, v1, s1
+; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, s3, v2
; GFX10-NEXT: v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT: v_subrev_nc_u32_e32 v5, s2, v2
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
+; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v6, s0
+; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v4, s0
+; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT: v_subrev_nc_u32_e32 v4, s2, v2
+; GFX10-NEXT: v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v1
-; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT: v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v3
-; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT: v_subrev_nc_u32_e32 v7, s1, v3
-; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v2, v2, v6, s0
-; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX10-NEXT: v_subrev_nc_u32_e32 v6, s1, v3
+; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc_lo
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, v3, v6, s0
+; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-NEXT: global_store_dword v1, v2, s[6:7]
;
; GFX8-LABEL: s_usubsat_i7:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 9
+; GFX8-NEXT: s_lshl_b32 s0, s0, 9
; GFX8-NEXT: v_mov_b32_e32 v0, s1
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 9, v0
;
; GFX9-LABEL: s_usubsat_i7:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 9
+; GFX9-NEXT: s_lshl_b32 s0, s0, 9
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 9, v0
;
; GFX10PLUS-LABEL: s_usubsat_i7:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 9, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 9
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 9, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
;
; GFX8-LABEL: s_usubsat_i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX8-NEXT: s_lshl_b32 s1, s1, s2
-; GFX8-NEXT: s_lshl_b32 s0, s0, s2
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v0, 8, v0
;
; GFX9-LABEL: s_usubsat_i8:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX9-NEXT: s_lshl_b32 s1, s1, s2
-; GFX9-NEXT: s_lshl_b32 s0, s0, s2
+; GFX9-NEXT: s_lshl_b32 s1, s1, 8
+; GFX9-NEXT: s_lshl_b32 s0, s0, 8
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v0
;
; GFX10PLUS-LABEL: s_usubsat_i8:
; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_bfe_u32 s2, 8, 0x100000
-; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2
-; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s2
+; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 8
+; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 8
; GFX10PLUS-NEXT: v_sub_nc_u16 v0, s0, s1 clamp
; GFX10PLUS-NEXT: v_lshrrev_b16 v0, 8, v0
; GFX10PLUS-NEXT: v_readfirstlane_b32 s0, v0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v2, v0, s4
-; GFX9-NEXT: v_perm_b32 v1, v3, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX10-NEXT: s_movk_i32 s4, 0xff
-; GFX10-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_sub_u16 v0, v0, v1 clamp
;
; GFX8-LABEL: s_usubsat_v2i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s4, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
-; GFX8-NEXT: s_lshl_b32 s1, s1, s4
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
-; GFX8-NEXT: s_lshl_b32 s0, s0, s4
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s3, s4
+; GFX8-NEXT: s_lshl_b32 s1, s3, 8
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s2, s4
+; GFX8-NEXT: s_lshl_b32 s0, s2, 8
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_sub_u16_e64 v1, s0, v1 clamp
; GFX8-NEXT: v_lshrrev_b16_e32 v1, 8, v1
; GFX9-LABEL: v_usubsat_v4i8:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
+; GFX9-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX9-NEXT: v_perm_b32 v2, v2, v0, s4
+; GFX9-NEXT: v_and_b32_e32 v6, 0xffff, v0
; GFX9-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX9-NEXT: v_perm_b32 v3, v4, v1, s4
+; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v1
+; GFX9-NEXT: v_lshrrev_b32_e32 v5, 24, v1
+; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v6
+; GFX9-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX9-NEXT: v_alignbit_b32 v1, v5, v1, 16
; GFX9-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX9-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 24, v0
-; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
-; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX10-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
+; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
+; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v1
+; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
; GFX10-NEXT: v_alignbit_b32 v0, v3, v0, 16
-; GFX10-NEXT: v_perm_b32 v3, v4, v1, 0x5040100
-; GFX10-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX10-NEXT: v_lshl_or_b32 v2, v2, 16, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 24
-; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
+; GFX10-NEXT: v_lshl_or_b32 v3, v5, 16, v6
+; GFX10-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1]
; GFX10-NEXT: v_pk_sub_u16 v2, v2, v3 clamp
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 8, v0
; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1
-; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v0
-; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v1
-; GFX11-NEXT: v_perm_b32 v2, v2, v0, 0x5040100
-; GFX11-NEXT: v_perm_b32 v3, v3, v1, 0x5040100
-; GFX11-NEXT: v_alignbit_b32 v0, v4, v0, 16
-; GFX11-NEXT: v_alignbit_b32 v1, v5, v1, 16
+; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v1
+; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v0
+; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1
+; GFX11-NEXT: v_lshl_or_b32 v2, v2, 16, v4
+; GFX11-NEXT: v_lshl_or_b32 v3, v3, 16, v5
+; GFX11-NEXT: v_alignbit_b32 v0, v6, v0, 16
+; GFX11-NEXT: v_alignbit_b32 v1, v7, v1, 16
; GFX11-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1]
; GFX11-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1]
;
; GFX8-LABEL: s_usubsat_v4i8:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_bfe_u32 s8, 8, 0x100000
; GFX8-NEXT: s_lshr_b32 s5, s1, 8
; GFX8-NEXT: s_lshr_b32 s6, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
-; GFX8-NEXT: s_lshl_b32 s1, s1, s8
+; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_lshr_b32 s2, s0, 8
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 24
-; GFX8-NEXT: s_lshl_b32 s0, s0, s8
+; GFX8-NEXT: s_lshl_b32 s0, s0, 8
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: s_lshl_b32 s1, s5, s8
+; GFX8-NEXT: s_lshl_b32 s1, s5, 8
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s2, s8
+; GFX8-NEXT: s_lshl_b32 s0, s2, 8
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_sub_u16_e64 v1, s0, v1 clamp
-; GFX8-NEXT: s_lshl_b32 s1, s6, s8
+; GFX8-NEXT: s_lshl_b32 s1, s6, 8
; GFX8-NEXT: v_mov_b32_e32 v4, 0xff
-; GFX8-NEXT: s_lshl_b32 s0, s3, s8
+; GFX8-NEXT: s_lshl_b32 s0, s3, 8
; GFX8-NEXT: v_mov_b32_e32 v2, s1
-; GFX8-NEXT: s_lshl_b32 s1, s7, s8
+; GFX8-NEXT: s_lshl_b32 s1, s7, 8
; GFX8-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_sub_u16_e64 v2, s0, v2 clamp
-; GFX8-NEXT: s_lshl_b32 s0, s4, s8
+; GFX8-NEXT: s_lshl_b32 s0, s4, 8
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 8, v1
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e64 v2, v0, v1 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v2i16:
;
; GFX8-LABEL: s_usubsat_v2i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: s_lshr_b32 s3, s1, 16
; GFX8-NEXT: v_mov_b32_e32 v0, s1
-; GFX8-NEXT: v_sub_u16_e64 v1, s2, v1 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_sub_u16_e64 v1, s0, v0 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v2, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v0, v2, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: usubsat_v2i16_sv:
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
; GFX8-NEXT: v_mov_b32_e32 v2, s1
; GFX8-NEXT: v_sub_u16_e64 v1, v0, s0 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: usubsat_v2i16_vs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e64 v4, v0, v2 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v2, v1, v3 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v4i16:
;
; GFX8-LABEL: s_usubsat_v4i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s6, s2, 16
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
+; GFX8-NEXT: s_lshr_b32 s6, s2, 16
+; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
; GFX8-NEXT: v_mov_b32_e32 v1, s6
-; GFX8-NEXT: s_lshr_b32 s5, s1, 16
+; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v0, s2
-; GFX8-NEXT: v_sub_u16_e64 v1, s4, v1 clamp
+; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, s3
; GFX8-NEXT: v_mov_b32_e32 v3, s7
-; GFX8-NEXT: v_mov_b32_e32 v4, 16
+; GFX8-NEXT: v_mov_b32_e32 v4, s5
; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, s3
-; GFX8-NEXT: v_sub_u16_e64 v3, s5, v3 clamp
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_sub_u16_e64 v2, s1, v2 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: ; return to shader part epilog
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e64 v6, v0, v3 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v3 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v3, v1, v4 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v4 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v4, v2, v5 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v5 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v5, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v6, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v6i16:
;
; GFX8-LABEL: s_usubsat_v6i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s9, s3, 16
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
-; GFX8-NEXT: s_lshr_b32 s10, s4, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s9
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
+; GFX8-NEXT: s_lshr_b32 s9, s3, 16
+; GFX8-NEXT: s_lshr_b32 s10, s4, 16
+; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_lshr_b32 s11, s5, 16
-; GFX8-NEXT: v_mov_b32_e32 v0, s3
-; GFX8-NEXT: v_sub_u16_e64 v1, s6, v1 clamp
+; GFX8-NEXT: v_mov_b32_e32 v1, s9
+; GFX8-NEXT: v_mov_b32_e32 v2, s6
; GFX8-NEXT: v_mov_b32_e32 v3, s10
-; GFX8-NEXT: v_mov_b32_e32 v6, 16
-; GFX8-NEXT: s_lshr_b32 s8, s2, 16
-; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
+; GFX8-NEXT: v_mov_b32_e32 v4, s7
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v2, s4
-; GFX8-NEXT: v_sub_u16_e64 v3, s7, v3 clamp
+; GFX8-NEXT: v_sub_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v4, s5
; GFX8-NEXT: v_mov_b32_e32 v5, s11
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_mov_b32_e32 v6, s8
+; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
; GFX8-NEXT: v_sub_u16_e64 v2, s1, v2 clamp
-; GFX8-NEXT: v_mov_b32_e32 v4, s5
-; GFX8-NEXT: v_sub_u16_e64 v5, s8, v5 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
; GFX8-NEXT: v_sub_u16_e64 v4, s2, v4 clamp
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v6, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v5, v6, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v5
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: v_readfirstlane_b32 s2, v2
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_e64 v8, v0, v4 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v4 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v4, v1, v5 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v5 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v5, v2, v6 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v6 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v6 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v6, v3, v7 clamp
-; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v7 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_mov_b32_e32 v7, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v7 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-NEXT: v_or_b32_e32 v2, v5, v2
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v8i16:
;
; GFX8-LABEL: s_usubsat_v8i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_lshr_b32 s12, s4, 16
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
-; GFX8-NEXT: s_lshr_b32 s13, s5, 16
-; GFX8-NEXT: v_mov_b32_e32 v1, s12
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
+; GFX8-NEXT: s_lshr_b32 s10, s2, 16
+; GFX8-NEXT: s_lshr_b32 s12, s4, 16
+; GFX8-NEXT: s_lshr_b32 s13, s5, 16
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
+; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
-; GFX8-NEXT: v_mov_b32_e32 v0, s4
-; GFX8-NEXT: v_sub_u16_e64 v1, s8, v1 clamp
+; GFX8-NEXT: v_mov_b32_e32 v1, s12
+; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: v_mov_b32_e32 v3, s13
-; GFX8-NEXT: v_mov_b32_e32 v8, 16
-; GFX8-NEXT: s_lshr_b32 s10, s2, 16
-; GFX8-NEXT: s_lshr_b32 s11, s3, 16
-; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
-; GFX8-NEXT: v_mov_b32_e32 v2, s5
-; GFX8-NEXT: v_sub_u16_e64 v3, s9, v3 clamp
+; GFX8-NEXT: v_mov_b32_e32 v4, s9
; GFX8-NEXT: v_mov_b32_e32 v5, s14
-; GFX8-NEXT: v_mov_b32_e32 v7, s15
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_sub_u16_e64 v2, s1, v2 clamp
+; GFX8-NEXT: v_mov_b32_e32 v6, s10
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_mov_b32_e32 v2, s5
+; GFX8-NEXT: v_sub_u16_sdwa v3, v4, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v4, s6
-; GFX8-NEXT: v_sub_u16_e64 v5, s10, v5 clamp
+; GFX8-NEXT: v_sub_u16_sdwa v5, v6, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v6, s7
-; GFX8-NEXT: v_sub_u16_e64 v7, s11, v7 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GFX8-NEXT: v_mov_b32_e32 v7, s15
+; GFX8-NEXT: v_mov_b32_e32 v8, s11
+; GFX8-NEXT: v_sub_u16_e64 v0, s0, v0 clamp
+; GFX8-NEXT: v_sub_u16_e64 v2, s1, v2 clamp
; GFX8-NEXT: v_sub_u16_e64 v4, s2, v4 clamp
; GFX8-NEXT: v_sub_u16_e64 v6, s3, v6 clamp
-; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_sub_u16_sdwa v7, v8, v7 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: v_or_b32_e32 v1, v2, v3
+; GFX8-NEXT: v_or_b32_e32 v2, v4, v5
+; GFX8-NEXT: v_or_b32_e32 v3, v6, v7
; GFX8-NEXT: v_readfirstlane_b32 s0, v0
; GFX8-NEXT: v_readfirstlane_b32 s1, v1
; GFX8-NEXT: v_readfirstlane_b32 s2, v2
}
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
-; GFX7-LABEL: vector_xnor_i32_one_use:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: vector_xnor_i32_one_use:
-; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: vector_xnor_i32_one_use:
-; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: vector_xnor_i32_one_use:
-; GFX906: ; %bb.0: ; %entry
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: vector_xnor_i32_one_use:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i32_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor3_b32 v0, v0, v1, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
+; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i32 %a, %b
}
define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
-; GFX7-LABEL: vector_xnor_i64_one_use:
-; GFX7: ; %bb.0: ; %entry
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: vector_xnor_i64_one_use:
-; GFX8: ; %bb.0: ; %entry
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX900-LABEL: vector_xnor_i64_one_use:
-; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX906-LABEL: vector_xnor_i64_one_use:
-; GFX906: ; %bb.0: ; %entry
-; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2
-; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3
-; GFX906-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: vector_xnor_i64_one_use:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
+; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: v_not_b32_e32 v1, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: vector_xnor_i64_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: v_xor3_b32 v0, v0, v2, -1
-; GFX10-NEXT: v_xor3_b32 v1, v1, v3, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%xor = xor i64 %a, %b
}
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
-; GFX7-LABEL: xnor_s_v_i32_one_use:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: xnor_s_v_i32_one_use:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX900-LABEL: xnor_s_v_i32_one_use:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: ; return to shader part epilog
-;
-; GFX906-LABEL: xnor_s_v_i32_one_use:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
-; GFX906-NEXT: ; return to shader part epilog
+; GCN-LABEL: xnor_s_v_i32_one_use:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_s_v_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor3_b32 v0, s0, v0, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %s, %v
%d = xor i32 %xor, -1
}
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
-; GFX7-LABEL: xnor_v_s_i32_one_use:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: xnor_v_s_i32_one_use:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX900-LABEL: xnor_v_s_i32_one_use:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: ; return to shader part epilog
-;
-; GFX906-LABEL: xnor_v_s_i32_one_use:
-; GFX906: ; %bb.0:
-; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
-; GFX906-NEXT: ; return to shader part epilog
+; GCN-LABEL: xnor_v_s_i32_one_use:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
+; GCN-NEXT: v_not_b32_e32 v0, v0
+; GCN-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_v_s_i32_one_use:
; GFX10: ; %bb.0:
-; GFX10-NEXT: v_xor3_b32 v0, v0, s0, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_not_b32_e32 v0, v0
; GFX10-NEXT: ; return to shader part epilog
%xor = xor i32 %v, %s
%d = xor i32 %xor, -1
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: xnor_i64_s_v_one_use:
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX900-LABEL: xnor_i64_s_v_one_use:
; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: v_not_b32_e32 v1, v1
; GFX900-NEXT: ; return to shader part epilog
;
; GFX906-LABEL: xnor_i64_s_v_one_use:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
-; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1
+; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX906-NEXT: v_not_b32_e32 v0, v0
+; GFX906-NEXT: v_not_b32_e32 v1, v1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_s_v_one_use:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor3_b32 v0, s0, v0, -1
-; GFX10-NEXT: v_xor3_b32 v1, s1, v1, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v1
; GFX10-NEXT: ; return to shader part epilog
entry:
%b = shl i64 %b64, 29
; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v0, v0
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: xnor_i64_v_s_one_use:
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v0, v0
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: ; return to shader part epilog
;
; GFX900-LABEL: xnor_i64_v_s_one_use:
; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
-; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX900-NEXT: v_not_b32_e32 v0, v0
+; GFX900-NEXT: v_not_b32_e32 v1, v1
; GFX900-NEXT: ; return to shader part epilog
;
; GFX906-LABEL: xnor_i64_v_s_one_use:
; GFX906: ; %bb.0:
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
-; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1
+; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX906-NEXT: v_not_b32_e32 v0, v0
+; GFX906-NEXT: v_not_b32_e32 v1, v1
; GFX906-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: xnor_i64_v_s_one_use:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
-; GFX10-NEXT: v_xor3_b32 v0, v0, s0, -1
-; GFX10-NEXT: v_xor3_b32 v1, v1, s1, -1
+; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT: v_not_b32_e32 v0, v0
+; GFX10-NEXT: v_not_b32_e32 v1, v1
; GFX10-NEXT: ; return to shader part epilog
%b = shl i64 %b64, 29
%xor = xor i64 %b, %a
; GFX7-LABEL: vector_xor_na_b_i32_one_use:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX7-NEXT: v_not_b32_e32 v0, v0
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: vector_xor_na_b_i32_one_use:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: vector_xor_na_b_i32_one_use:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0
+; GFX900-NEXT: v_not_b32_e32 v0, v0
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX7-LABEL: vector_xor_a_nb_i32_one_use:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX7-NEXT: v_not_b32_e32 v1, v1
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: vector_xor_a_nb_i32_one_use:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX8-NEXT: v_not_b32_e32 v1, v1
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-LABEL: vector_xor_a_nb_i32_one_use:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1
+; GFX900-NEXT: v_not_b32_e32 v1, v1
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-LABEL: csh_v4i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: v_and_b32_e32 v4, 31, v4
+; GISEL-NEXT: v_and_b32_e32 v5, 31, v5
+; GISEL-NEXT: v_and_b32_e32 v6, 31, v6
+; GISEL-NEXT: v_and_b32_e32 v7, 31, v7
; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0
; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2
;
; GFX10GISEL-LABEL: sample_d_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_d_3d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
+; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
-; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_c_d_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_d_cl_1d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_d_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
+; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
-; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4
; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_c_d_cl_1d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_c_d_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
+; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
-; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5
; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_cd_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4
; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_c_cd_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5
; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_cd_cl_1d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_cd_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
+; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
-; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4
; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3
; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
+; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
-; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5
; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
+; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
-; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
+; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
-; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
+; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6
; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
-; GFX10GISEL-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v9
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
-; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
-; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3
; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
-; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
-; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
+; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1
; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
-; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
-; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
-; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
-; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9
+; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0
; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
;
; GFX10GISEL-LABEL: sample_d_2d_g16_a16:
; GFX10GISEL: ; %bb.0: ; %main_body
-; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
+; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v5, 16, v4
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX10GISEL: ; %bb.0: ; %main_body
; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v7
-; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
-; GFX10GISEL-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
-; GFX10GISEL-NEXT: v_perm_b32 v6, v10, v6, 0x5040100
+; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
+; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
+; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
+; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0
+; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v10, 16, v6
+; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v8
; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX10GISEL-NEXT: ; return to shader part epilog
; GFX7GLISEL-NEXT: s_mov_b32 s2, -1
; GFX7GLISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX7GLISEL-NEXT: s_and_b32 s3, s3, 0x7fff
-; GFX7GLISEL-NEXT: s_bfe_u32 s3, s3, 0x100000
+; GFX7GLISEL-NEXT: s_and_b32 s3, 0xffff, s3
; GFX7GLISEL-NEXT: s_cmpk_gt_u32 s3, 0x7c00
; GFX7GLISEL-NEXT: s_cselect_b32 s3, 1, 0
; GFX7GLISEL-NEXT: s_bfe_i32 s3, s3, 0x10000
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0x400, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v0, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v0
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v0
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v1
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0x7fff, v2
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v2, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0x7fff, v3
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v1
-; GFX7GLISEL-NEXT: v_bfe_u32 v3, v3, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s4, v2
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v1
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[4:5], 1, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v2, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v2
; GFX7GLISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], 1, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1
; GFX7GLISEL: ; %bb.0:
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL-NEXT: s_setpc_b64 s[30:31]
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], s6, v1
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0x7fff, v0
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_movk_i32 s6, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s6, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v1, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], s[4:5], vcc
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7GLISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e64 s[4:5], v0, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7e00
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s8, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], s8, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x3ff
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v1, v2
-; GFX7GLISEL-NEXT: v_bfe_u32 v1, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v2
; GFX7GLISEL-NEXT: v_mov_b32_e32 v2, 0x7e00
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v0
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: s_movk_i32 s4, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0
; GFX7GLISEL-NEXT: v_cmp_lt_u32_e64 s[4:5], s4, v0
; GFX7GLISEL: ; %bb.0: ; %entry
; GFX7GLISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX7GLISEL-NEXT: v_bfe_u32 v0, v0, 0, 16
+; GFX7GLISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7GLISEL-NEXT: v_mov_b32_e32 v1, 0x7c00
; GFX7GLISEL-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1
; GFX7GLISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: v_mov_b32_e32 v0, 1.0
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: s_movk_i32 s4, 0x3c00
-; GISEL-VI-NEXT: s_bfe_u32 s4, s4, 0x100000
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_e32 v0, s4, v0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: v_mov_b32_e32 v0, v3
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
; SDAG-CI: ; %bb.0:
; GISEL-GFX9: ; %bb.0:
; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1
; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.ext = fpext half %src0 to float
; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; SDAG-VI: ; %bb.0:
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; SDAG-VI-NEXT: flat_store_short v[0:1], v0
-; SDAG-VI-NEXT: s_waitcnt vmcnt(0)
-; SDAG-VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cvt_f32_f16_e32 v0, v0
+; VI-NEXT: v_cvt_f32_f16_e32 v1, v1
+; VI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; VI-NEXT: v_mac_f32_e32 v2, v0, v1
+; VI-NEXT: v_cvt_f16_f32_e32 v0, v2
+; VI-NEXT: flat_store_short v[0:1], v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
; SDAG-CI: ; %bb.0:
; SDAG-CI-NEXT: s_waitcnt vmcnt(0)
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
-; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
-; GISEL-VI: ; %bb.0:
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2
-; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: flat_store_short v[0:1], v0
-; GISEL-VI-NEXT: s_waitcnt vmcnt(0)
-; GISEL-VI-NEXT: v_max_f16_e64 v0, v0, v0 clamp
-; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
-;
; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use:
; GISEL-CI: ; %bb.0:
; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
attributes #1 = { nounwind readnone speculatable }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CI: {{.*}}
-; VI: {{.*}}
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v2f32:
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v4
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v8
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v8
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v5
-; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v3f32:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
-; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
-; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v4
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v10
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v5
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v11
-; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v3
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v4f32:
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v5 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt:
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v4 clamp
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v8 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v2, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v8 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v5 clamp
-; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16
+; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v2
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
-; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
-; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4
+; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8
+; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2
; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v4 clamp
; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v10 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v4 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v11 clamp
-; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v3, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v3
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt:
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX900-NEXT: v_max_f16_e64 v4, v3, v3 clamp
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT: v_bfe_u32 v0, v4, 0, 16
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff0000
-; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0
+; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 0xffff0000
+; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v0, v4
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX906-NEXT: v_max_f16_e64 v4, v3, v3 clamp
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT: v_bfe_u32 v0, v4, 0, 16
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff0000
-; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0
+; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 0xffff0000
+; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v0, v4
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: v_max_f16_e64 v1, v0, v0 clamp
; GISEL-VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo:
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v5
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v4
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
-; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
-; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v0
; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 16
-; GISEL-GFX900-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff
; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0
; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31]
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
-; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 16
-; GISEL-GFX906-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v4
+; GISEL-GFX906-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff
; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4
; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
+; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v5
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4
; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3
-; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5
; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2
-; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4
-; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
-; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v5
+; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v4
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0
-; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16
-; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2
; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16
; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GISEL-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp
; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v3
-; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v1, v0
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt:
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3
; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5
-; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
; GISEL-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
+; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v6
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
-; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v2, v0
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt:
; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp
; GISEL-VI-NEXT: v_mad_f32 v2, v7, v9, v11 clamp
; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v6
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GISEL-VI-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
+; GISEL-VI-NEXT: v_or_b32_e32 v0, v3, v0
+; GISEL-VI-NEXT: v_or_b32_e32 v1, v2, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f16_e32 v1, v0
-; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f16:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
; GFX8-NEXT: v_rndne_f16_e32 v1, v0
-; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v2, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v2f16_fneg:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_rndne_f16_e32 v2, v0
-; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX8-NEXT: v_rndne_f16_e32 v3, v1
-; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT: v_mov_b32_e32 v4, 16
-; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_roundeven_v4f16:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_mul_f16_e32 v4, v0, v2
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v1, v3
-; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
;
; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
-; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3
-; GFX8-GISEL-NEXT: v_mul_f16_e32 v1, s0, v1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-GISEL-NEXT: v_add_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-GISEL-NEXT: v_add_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
; GFX8-GISEL-NEXT: v_add_f16_e32 v2, v0, v1
-; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
; GFX9-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-GISEL-NEXT: s_mov_b32 s4, 0x5040100
; GFX9-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
-; GFX9-GISEL-NEXT: v_perm_b32 v0, v0, v4, s4
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
-; GFX8-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_bfe_u32 v1, v1, 0, 16
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX10-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
-; GFX10-GISEL-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
+; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v0, v0, v2
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v2, v4, v5
-; GFX10PLUS-GISEL-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
+; GFX10PLUS-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10PLUS-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31]
%val = call <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <3 x half> %val
; GFX9-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-GISEL-NEXT: v_sub_f16_e32 v2, v1, v3
; GFX9-GISEL-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX9-GISEL-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-GISEL-NEXT: v_perm_b32 v0, v0, v4, s4
-; GFX9-GISEL-NEXT: v_perm_b32 v1, v1, v2, s4
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
-; GFX8-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-GISEL-NEXT: v_sub_f16_e32 v2, v1, v3
-; GFX8-GISEL-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 16
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-GISEL-NEXT: v_sub_f16_e32 v4, v0, v2
+; GFX10-GISEL-NEXT: v_sub_f16_e32 v5, v1, v3
; GFX10-GISEL-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-GISEL-NEXT: v_sub_f16_e32 v2, v1, v3
; GFX10-GISEL-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; GFX10-GISEL-NEXT: v_perm_b32 v0, v0, v4, 0x5040100
-; GFX10-GISEL-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
+; GFX10-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
+; GFX10-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX10-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v2, v4, v6
; GFX10PLUS-GISEL-NEXT: v_sub_f16_e32 v3, v5, v7
-; GFX10PLUS-GISEL-NEXT: v_perm_b32 v0, v2, v0, 0x5040100
-; GFX10PLUS-GISEL-NEXT: v_perm_b32 v1, v3, v1, 0x5040100
+; GFX10PLUS-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX10PLUS-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX10PLUS-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX10PLUS-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX10PLUS-GISEL-NEXT: s_setpc_b64 s[30:31]
%val = call <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret <4 x half> %val
; GFX8-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_xor_b32 s0, s3, 0x80008000
-; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-GISEL-NEXT: s_lshr_b32 s1, s2, 16
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-GISEL-NEXT: s_lshr_b32 s3, s0, 16
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8-GISEL-NEXT: v_add_f16_e32 v1, s1, v1
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, 16
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s1
; GFX8-GISEL-NEXT: v_add_f16_e32 v0, s2, v0
-; GFX8-GISEL-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,SDAG-GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,SDAG-GFX11 %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-VI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX9,GISEL-GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1101 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11 %s
; <GFX9 has no V_SAT_PK, GFX9+ has V_SAT_PK, GFX11 has V_SAT_PK with t16
; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: basic_smax_smin:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xff
-; GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
-; GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: basic_smax_smin:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
-; GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: basic_smax_smin:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-LABEL: basic_smax_smin:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_smax_smin:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: basic_smax_smin:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: basic_smax_smin:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
%src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
%src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
; GISEL-VI-NEXT: s_min_i32 s3, s3, s5
; GISEL-VI-NEXT: s_min_i32 s2, s2, s5
-; GISEL-VI-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GISEL-VI-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GISEL-VI-NEXT: s_and_b32 s3, 0xffff, s3
+; GISEL-VI-NEXT: s_and_b32 s2, 0xffff, s2
; GISEL-VI-NEXT: s_lshl_b32 s3, s3, 16
; GISEL-VI-NEXT: s_or_b32 s2, s2, s3
; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: basic_smin_smax:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xff
-; GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
-; GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: basic_smin_smax:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
-; GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: basic_smin_smax:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-LABEL: basic_smin_smax:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_smin_smax:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: v_max_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: basic_smin_smax:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: basic_smin_smax:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
%src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
%src1.min = call i16 @llvm.smin.i16(i16 %src1, i16 255)
; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-VI-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: basic_smin_smax_combined:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xff
-; GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
-; GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
-; GFX9-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: basic_smin_smax_combined:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
-; GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX9-LABEL: basic_smin_smax_combined:
+; SDAG-GFX9: ; %bb.0:
+; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; SDAG-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; SDAG-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; SDAG-GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; SDAG-GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; SDAG-GFX11-LABEL: basic_smin_smax_combined:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: basic_smin_smax_combined:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: v_min_i16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GISEL-VI-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX9-LABEL: basic_smin_smax_combined:
+; GISEL-GFX9: ; %bb.0:
+; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9-NEXT: v_mov_b32_e32 v2, 0xff
+; GISEL-GFX9-NEXT: v_med3_i16 v0, v0, 0, v2
+; GISEL-GFX9-NEXT: v_med3_i16 v1, v1, 0, v2
+; GISEL-GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: basic_smin_smax_combined:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
%src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
%src1.max = call i16 @llvm.smax.i16(i16 %src1, i16 0)
; GISEL-VI-NEXT: s_sext_i32_i16 s2, s2
; GISEL-VI-NEXT: s_min_i32 s3, s3, s4
; GISEL-VI-NEXT: s_min_i32 s2, s2, s4
-; GISEL-VI-NEXT: s_bfe_u32 s3, s3, 0x100000
-; GISEL-VI-NEXT: s_bfe_u32 s2, s2, 0x100000
+; GISEL-VI-NEXT: s_and_b32 s3, 0xffff, s3
+; GISEL-VI-NEXT: s_and_b32 s2, 0xffff, s2
; GISEL-VI-NEXT: s_lshl_b32 s3, s3, 16
; GISEL-VI-NEXT: s_or_b32 s2, s2, s3
; GISEL-VI-NEXT: v_mov_b32_e32 v0, s0
include "llvm/Target/Target.td"
include "GlobalISelEmitterCommon.td"
-// No rule will be added to the match table.
// CHECK: constexpr static int64_t MatchTable0[] = {
-// CHECK-NEXT: GIM_Reject
-// CHECK-NEXT: };
+// CHECK-NEXT: GIM_Try,
+// CHECK-NEXT: GIM_CheckOpcode{{.*}}TargetOpcode::G_ANYEXT,
+// CHECK-NEXT: GIM_CheckType{{.*}}/*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckType{{.*}}/*Type*/GILLT_s8,
+// CHECK-NEXT: GIM_CheckRegBankForClass{{.*}}/*RC*/MyTarget::GPR32RegClassID,
+// CHECK-NEXT: // (anyext:{{.*}}=>{{.*}}(SELECT_I4:
+// CHECK: GIR_Done,
+// CHECK-NEXT: // Label 0:
+// CHECK-NEXT: GIM_Reject,
+// CHECK-NEXT: };
def SELECT_I4 : I<(outs GPR32:$dst), (ins GPR8:$cond, GPR32:$T, GPR32:$F), []>;
def LI : I<(outs GPR32:$dst), (ins i32imm:$src), []>;
const TreePatternNode *Src, const TreePatternNode *Dst);
Expected<action_iterator> createAndImportSubInstructionRenderer(
action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
- unsigned TempReg);
+ const TreePatternNode *Src, unsigned TempReg);
Expected<action_iterator>
createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
const TreePatternNode *Dst);
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
const TreePatternNode *Src, const TreePatternNode *Dst);
- Expected<action_iterator>
- importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M,
- BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode *Dst);
- Expected<action_iterator>
- importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
- BuildMIAction &DstMIBuilder,
- const TreePatternNode *DstChild);
+ Expected<action_iterator> importExplicitUseRenderers(
+ action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+ const llvm::TreePatternNode *Dst, const TreePatternNode *Src);
+ Expected<action_iterator> importExplicitUseRenderer(
+ action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
+ const TreePatternNode *DstChild, const TreePatternNode *Src);
Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
BuildMIAction &DstMIBuilder,
DagInit *DefaultOps) const;
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
- const TreePatternNode *DstChild) {
+ const TreePatternNode *DstChild, const TreePatternNode *Src) {
const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName());
if (SubOperand) {
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, Rule, DstChild, TempRegID);
+ ++InsertPt, Rule, DstChild, Src, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
return InsertPtOrError.get();
return failedImport(
"Dst pattern child def is an unsupported tablegen class");
}
+
+ // Handle the case where the MVT/register class is omitted in the dest pattern
+ // but MVT exists in the source pattern.
+ if (isa<UnsetInit>(DstChild->getLeafValue())) {
+ for (unsigned NumOp = 0; NumOp < Src->getNumChildren(); NumOp++)
+ if (Src->getChild(NumOp)->getName() == DstChild->getName()) {
+ DstMIBuilder.addRenderer<CopyRenderer>(Src->getChild(NumOp)->getName());
+ return InsertPt;
+ }
+ }
return failedImport("Dst pattern child is an unsupported kind");
}
.takeError())
return std::move(Error);
- if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst)
- .takeError())
+ if (auto Error =
+ importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst, Src)
+ .takeError())
return std::move(Error);
return DstMIBuilder;
Expected<action_iterator>
GlobalISelEmitter::createAndImportSubInstructionRenderer(
const action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
- unsigned TempRegID) {
+ const TreePatternNode *Src, unsigned TempRegID) {
auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
// TODO: Assert there's exactly one result.
// Assign the result to TempReg.
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
- InsertPtOrError =
- importExplicitUseRenderers(InsertPtOrError.get(), M, DstMIBuilder, Dst);
+ InsertPtOrError = importExplicitUseRenderers(InsertPtOrError.get(), M,
+ DstMIBuilder, Dst, Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode *Dst) {
+ const llvm::TreePatternNode *Dst, const llvm::TreePatternNode *Src) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator());
InsertPt, *ExtractSrcTy, TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, M, ValChild, TempRegID);
+ ++InsertPt, M, ValChild, Src, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
auto InsertPtOrError =
- importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild);
+ importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild, Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();
}
auto InsertPtOrError = importExplicitUseRenderer(InsertPt, M, DstMIBuilder,
- Dst->getChild(Child));
+ Dst->getChild(Child), Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();