From: Kenneth Graunke Date: Wed, 29 Jan 2014 21:45:27 +0000 (-0800) Subject: i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes. X-Git-Tag: upstream/10.3~3540 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=808952a09543b60e59c5ad9238d8403fa9f1f15b;p=platform%2Fupstream%2Fmesa.git i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes. I'd neglected to port these to Broadwell. Most of this code is copy and pasted from Gen7, but instead of using F32TO16/F16TO32, we just use MOV with HF register types. Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB extension and ES 3.0 variants). Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 5058c48..b1e38b6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -731,6 +731,13 @@ private: void generate_set_simd4x2_offset(fs_inst *ir, struct brw_reg dst, struct brw_reg offset); + void generate_pack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg x, + struct brw_reg y); + void generate_unpack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void generate_discard_jump(fs_inst *ir); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp index 0e1214d..86e772c 100644 --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp @@ -757,6 +757,78 @@ gen8_fs_generator::generate_set_sample_id(fs_inst *ir, default_state.exec_size = save_exec_size; } +/** + * Change the register's data type from UD to HF, doubling the strides in order + * to compensate for halving the data type width. + */ +static struct brw_reg +ud_reg_to_hf(struct brw_reg r) +{ + assert(r.type == BRW_REGISTER_TYPE_UD); + r.type = BRW_REGISTER_TYPE_HF; + + /* The BRW_*_STRIDE enums are defined so that incrementing the field + * doubles the real stride. + */ + if (r.hstride != 0) + ++r.hstride; + if (r.vstride != 0) + ++r.vstride; + + return r; +} + +void +gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg x, + struct brw_reg y) +{ + assert(dst.type == BRW_REGISTER_TYPE_UD); + assert(x.type == BRW_REGISTER_TYPE_F); + assert(y.type == BRW_REGISTER_TYPE_F); + + struct brw_reg dst_hf = ud_reg_to_hf(dst); + + /* Give each 32-bit channel of dst the form below , where "." means + * unchanged. + * 0x....hhhh + */ + MOV(dst_hf, y); + + /* Now the form: + * 0xhhhh0000 + */ + SHL(dst, dst, brw_imm_ud(16u)); + + /* And, finally the form of packHalf2x16's output: + * 0xhhhhllll + */ + MOV(dst_hf, x); +} + +void +gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(dst.type == BRW_REGISTER_TYPE_F); + assert(src.type == BRW_REGISTER_TYPE_UD); + + struct brw_reg src_hf = ud_reg_to_hf(src); + + /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. + * For the Y case, we wish to access only the upper word; therefore + * a 16-bit subregister offset is needed. + */ + assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || + inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); + if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) + src_hf.subnr += 2; + + MOV(dst, src_hf); +} + void gen8_fs_generator::generate_code(exec_list *instructions) { @@ -1140,12 +1212,12 @@ gen8_fs_generator::generate_code(exec_list *instructions) break; case FS_OPCODE_PACK_HALF_2x16_SPLIT: - assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT"); + generate_pack_half_2x16_split(ir, dst, src[0], src[1]); break; case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT"); + generate_unpack_half_2x16_split(ir, dst, src[0]); break; case FS_OPCODE_PLACEHOLDER_HALT: