From 14c46954c910efb1db94a068a866c7259deaa9d9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Mar 2016 13:57:15 -0700 Subject: [PATCH] i965: Add an implemnetation of nir_op_fquantize2f16 Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 28 ++++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 4de5599..7839428 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -993,6 +993,34 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + fs_reg tmp16 = bld.vgrf(BRW_REGISTER_TYPE_D); + fs_reg tmp32 = bld.vgrf(BRW_REGISTER_TYPE_F); + fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); + + /* The destination stride must be at least as big as the source stride. */ + tmp16.type = BRW_REGISTER_TYPE_W; + tmp16.stride = 2; + + /* Check for denormal */ + fs_reg abs_src0 = op[0]; + abs_src0.abs = true; + bld.CMP(bld.null_reg_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L); + /* Get the appropriately signed zero */ + bld.AND(retype(zero, BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000)); + /* Do the actual F32 -> F16 -> F32 conversion */ + bld.emit(BRW_OPCODE_F32TO16, tmp16, op[0]); + bld.emit(BRW_OPCODE_F16TO32, tmp32, tmp16); + /* Select that or zero based on normal status */ + inst = bld.SEL(result, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index eef3940..ee6929b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1188,6 +1188,31 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) inst->saturate = instr->dest.saturate; break; + case nir_op_fquantize2f16: { + /* See also vec4_visitor::emit_pack_half_2x16() */ + src_reg tmp16 = src_reg(this, glsl_type::uvec4_type); + src_reg tmp32 = src_reg(this, glsl_type::vec4_type); + src_reg zero = src_reg(this, glsl_type::vec4_type); + + /* Check for denormal */ + src_reg abs_src0 = op[0]; + abs_src0.abs = true; + emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)), + BRW_CONDITIONAL_L)); + /* Get the appropriately signed zero */ + emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD), + retype(op[0], BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x80000000))); + /* Do the actual F32 -> F16 -> F32 conversion */ + emit(F32TO16(dst_reg(tmp16), op[0])); + emit(F16TO32(dst_reg(tmp32), tmp16)); + /* Select that or zero based on normal status */ + inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32); + inst->predicate = BRW_PREDICATE_NORMAL; + inst->saturate = instr->dest.saturate; + break; + } + case nir_op_fmin: case nir_op_imin: case nir_op_umin: -- 2.7.4