//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_sel:uword0 dst_preserve src0_sel:uword0
//~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_sel:ubyte2 dst_preserve src0_sel:ubyte2
//~gfx11! v2b: %0:v[1][0:16] = v_add_u16_e64 %0:v[0][0:16], 0
- //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[0], %0:v[1], 0x7020504
+ //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1), Definition(v1_b3, v1b),
//~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
- //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
+ //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
//~gfx11! v2b: %0:v[1][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), 0 opsel_hi
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
bld.pseudo(aco_opcode::p_parallelcopy,
//~gfx(9|11)! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
//~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1
//~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
//~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
- //~gfx11! v1: %0:v[0] = v_perm_b32 0, %0:v[0], 0x5060704
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
//~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
//~gfx(9|11)! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
- //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
+ //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
//~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi
//~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16]
//~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi
- //~gfx11! v1: %0:v[0] = v_perm_b32 0, %0:v[0], 0x5060704
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
//~gfx11! v2b: %0:v[0][16:32] = v_add_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi
//~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16]
//~gfx11! v2b: %0:v[0][16:32] = v_sub_u16_e64 hi(%0:v[0][16:32]), %0:v[1][0:16] opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
- //~gfx11! v1: %0:v[0] = v_perm_b32 0, %0:v[0], 0x7040506
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x7040506
//~gfx11! v2b: %0:v[0][0:16] = v_add_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
//~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
//~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
- //~gfx11! v1: %0:v[0] = v_perm_b32 0, %0:v[0], 0x5060704
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_b1, v1b), Definition(v0_b3, v1b),
//! p_unit_test 9
//~gfx(9|10)! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_sel:ubyte1 dst_preserve src0_sel:dword
//~gfx(9|10)! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_sel:ubyte2 dst_preserve src0_sel:dword
- //~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060c04
+ //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c04
//~gfx11! v1: %_:v[0] = v_and_b32 0xff00ffff, %_:v[0]
//~gfx11! v1: %_:v[0] = v_or_b32 0x380000, %_:v[0]
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
//! p_unit_test 12
//! v1: %_:v[0] = v_mov_b32 0
//~gfx(9|10)! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
- //~gfx11! v1: %_:v[1] = v_perm_b32 0, %_:v[1], 0x706050c
+ //~gfx11! v1: %_:v[1] = v_perm_b32 %_:v[1], 0, 0x706050c
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
Operand::zero(), Operand::zero(1));
//>> p_unit_test 13
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
//~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0]
- //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060c0d
+ //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
Operand::c16(0x00ff));
//! p_unit_test 14
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
//~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0]
- //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0xd0c0504
+ //~gfx(10|11)! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b),
Operand::c16(0xff00));
//! p_unit_test 15
//~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
- //~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060c0c
+ //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0c
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
Operand::zero(2));
//! p_unit_test 16
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
- //~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x706050d
+ //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
Operand::c8(0xff));
//! p_unit_test 17
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
- //~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x706050c
+ //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
Operand::zero(1));
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
- //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060c00
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060000
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060a04
+ //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
EXT(0, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
- //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060c02
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060202
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060a04
+ //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
if (lvl != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
- //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060c01
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060801
+ //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
EXT(1, 0)
//~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
- //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060c03
- //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[1], %_:v[0], 0x7060903
+ //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
+ //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
if (lvl != GFX7)
EXT(1, 2)
//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
//~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0]
//~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1 src0_sel:dword
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0xc0c000c
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0c000c
INS(1, 8)
//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
//~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0]
//~gfx(8|9)! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2 src0_sel:dword
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0xc000c0c
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000c0c
INS(2, 8)
//! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1]
INS(3, 8)
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0x7060c00
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00
INS(0, 0)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0xc000504
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504
if (lvl != GFX7)
INS(0, 2)
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
//~gfx(8|9)! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0x706000c
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c
INS(1, 0)
//~gfx(8|9)! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0
- //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[1], %0:v[0], 0xc0504
+ //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc0504
if (lvl != GFX7)
INS(1, 2)