From 4588f4a6048af2ae1b3a2eb33fd23227c1edf593 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 26 Apr 2018 10:26:22 +0200 Subject: [PATCH] intel/compiler: handle extended math restrictions for half-float Extended math with half-float operands is only supported since gen9, but it is limited to SIMD8. In gen8 we lower it to 32-bit. v2: quashed together the following patches (Jason): - intel/compiler: allow extended math functions with HF operands - intel/compiler: lower 16-bit extended math to 32-bit prior to gen9 - intel/compiler: extended Math is limited to SIMD8 on half-float Reviewed-by: Jason Ekstrand Reviewed-by: Topi Pohjolainen (allow extended math functions with HF operands, extended Math is limited to SIMD8 on half-float) --- src/intel/compiler/brw_eu_emit.c | 6 ++++-- src/intel/compiler/brw_fs.cpp | 27 ++++++++++++++++++--------- src/intel/compiler/brw_nir.c | 13 ++++++++++++- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 94e247e..d589196 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1916,8 +1916,10 @@ void gen6_math(struct brw_codegen *p, assert(src1.file == BRW_GENERAL_REGISTER_FILE || (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); } else { - assert(src0.type == BRW_REGISTER_TYPE_F); - assert(src1.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_F || + (src0.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); + assert(src1.type == BRW_REGISTER_TYPE_F || + (src1.type == BRW_REGISTER_TYPE_HF && devinfo->gen >= 9)); } /* Source modifiers are ignored for extended math instructions on Gen6. */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index c04580a..15f51b0 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5936,18 +5936,27 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: - case SHADER_OPCODE_COS: + case SHADER_OPCODE_COS: { /* Unary extended math instructions are limited to SIMD8 on Gen4 and - * Gen6. + * Gen6. Extended Math Function is limited to SIMD8 with half-float. */ - return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) : - devinfo->gen == 5 || devinfo->is_g4x ? MIN2(16, inst->exec_size) : - MIN2(8, inst->exec_size)); + if (devinfo->gen == 6 || (devinfo->gen == 4 && !devinfo->is_g4x)) + return MIN2(8, inst->exec_size); + if (inst->dst.type == BRW_REGISTER_TYPE_HF) + return MIN2(8, inst->exec_size); + return MIN2(16, inst->exec_size); + } - case SHADER_OPCODE_POW: - /* SIMD16 is only allowed on Gen7+. */ - return (devinfo->gen >= 7 ? MIN2(16, inst->exec_size) : - MIN2(8, inst->exec_size)); + case SHADER_OPCODE_POW: { + /* SIMD16 is only allowed on Gen7+. Extended Math Function is limited + * to SIMD8 with half-float + */ + if (devinfo->gen < 7) + return MIN2(8, inst->exec_size); + if (inst->dst.type == BRW_REGISTER_TYPE_HF) + return MIN2(8, inst->exec_size); + return MIN2(16, inst->exec_size); + } case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index c45e7d5..ab55739 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -631,6 +631,8 @@ lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data) if (alu->dest.dest.ssa.bit_size != 16) return 0; + const struct brw_compiler *compiler = (const struct brw_compiler *) data; + switch (alu->op) { case nir_op_idiv: case nir_op_imod: @@ -643,6 +645,15 @@ lower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data) case nir_op_fround_even: case nir_op_ftrunc: return 32; + case nir_op_frcp: + case nir_op_frsq: + case nir_op_fsqrt: + case nir_op_fpow: + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_fsin: + case nir_op_fcos: + return compiler->devinfo->gen < 9 ? 32 : 0; default: return 0; } @@ -719,7 +730,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, OPT(nir_opt_large_constants, NULL, 32); } - OPT(nir_lower_bit_size, lower_bit_size_callback, NULL); + OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); if (is_scalar) { OPT(nir_lower_load_const_to_scalar); -- 2.7.4