Operand(v1_lo, v2b), Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 1
- //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
- //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[0][0:16], %0:v[1][16:32], 2
- //~gfx[67]! v1: %0:v[0] = v_mov_b32 %0:v[1]
+ //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(1u));
bld.pseudo(aco_opcode::p_create_vector,
Definition(v0_lo, v1),
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
//~gfx[67]! p_unit_test 2
- //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
- //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[0][0:16], %0:v[1][16:32], 2
- //~gfx[67]! v1: %0:v[0] = v_mov_b32 %0:v[1]
+ //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_create_vector,
Operand(v0_lo, v2b), Operand(v2_lo, v2b));
//~gfx[67]! p_unit_test 3
- //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
- //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[0][0:16], %0:v[1][16:32], 2
- //~gfx[67]! v1: %0:v[0] = v_mov_b32 %0:v[1]
+ //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
+ //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
bld.pseudo(aco_opcode::p_unit_test, Operand(3u));
continue;
//~gfx[89]>> p_unit_test 0
- //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
+ //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
+ //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(0u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
Operand((uint16_t)0x4205), Operand(v1_hi, v2b));
- //TODO: optimize this with GFX10. do_pack_2x16() isn't used in this case
//! p_unit_test 2
- //~gfx9! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve
- //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
+ //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
- //~gfx10! v2b: %_:v[0][16:32] = v_mov_b32 %_:v[0][0:16] dst_preserve
- //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
+ //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
bld.pseudo(aco_opcode::p_unit_test, Operand(2u));
bld.pseudo(aco_opcode::p_parallelcopy,
Definition(v0_lo, v2b), Definition(v0_hi, v2b),