From: Kenneth Graunke Date: Wed, 5 Aug 2020 02:01:13 +0000 (-0700) Subject: intel/compiler: Do interpolateAtOffset coordinate scaling in NIR X-Git-Tag: upstream/21.0.0~2367 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=97ebb896afb6801d1fcd69556583975411379998;p=platform%2Fupstream%2Fmesa.git intel/compiler: Do interpolateAtOffset coordinate scaling in NIR In our source languages, interpolateAtOffset() takes a floating point offset in the range [-0.5, +0.5]. However, the hardware takes integer valued offsets in the range [-8, 7], in units of 1/16th of a pixel. So, we need to multiply and clamp the coordinates. We were doing this in the FS backend, but with the advent of IBC, I'd like to avoid doing it twice. This patch instead moves the lowering to NIR so we can reuse it across both backends. v2: Use nir_shader_instructions_pass (suggested by Eric Anholt). Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Matt Turner Part-of: --- diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 56a0b29..018be45 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3629,8 +3629,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, if (const_offset) { assert(nir_src_bit_size(instr->src[0]) == 32); - unsigned off_x = MIN2((int)(const_offset[0].f32 * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset[1].f32 * 16), 7) & 0xf; + unsigned off_x = const_offset[0].u32 & 0xf; + unsigned off_y = const_offset[1].u32 & 0xf; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, @@ -3639,35 +3639,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, brw_imm_ud(off_x | (off_y << 4)), interpolation); } else { - fs_reg src = vgrf(glsl_type::ivec2_type); - fs_reg offset_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_F); - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); - fs_reg itemp = vgrf(glsl_type::int_type); - /* float to int */ - bld.MOV(itemp, temp); - - /* Clamp the upper end of the range to +7/16. - * ARB_gpu_shader5 requires that we support a maximum offset - * of +0.5, which isn't representable in a S0.4 value -- if - * we didn't clamp it, we'd end up with -8/16, which is the - * opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization - * rules: - * - * "Not all values of may be supported; x and y - * offsets may be rounded to fixed-point values with the - * number of fraction bits given by the - * implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); - } - + fs_reg src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D); const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; emit_pixel_interpolater_send(bld, opcode, diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index b83a880..4589ddd 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -364,6 +364,45 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) } } +/** + * Convert interpolateAtOffset() offsets from [-0.5, +0.5] floating point + * offsets to integer [-8, +7] offsets (in units of 1/16th of a pixel). + * + * We clamp to +7/16 on the upper end of the range, since +0.5 isn't + * representable in a S0.4 value; a naive conversion would give us -8/16, + * which is the opposite of what was intended. + * + * This is allowed by GL_ARB_gpu_shader5's quantization rules: + * + * "Not all values of may be supported; x and y offsets may + * be rounded to fixed-point values with the number of fraction bits + * given by the implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS." + */ +static bool +lower_barycentric_at_offset(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_offset) + return false; + + b->cursor = nir_before_instr(instr); + + assert(intrin->src[0].ssa); + nir_ssa_def *offset = + nir_imin(b, nir_imm_int(b, 7), + nir_f2i32(b, nir_fmul(b, nir_imm_float(b, 16), + intrin->src[0].ssa))); + + nir_instr_rewrite_src(instr, &intrin->src[0], nir_src_for_ssa(offset)); + + return true; +} + void brw_nir_lower_fs_inputs(nir_shader *nir, const struct gen_device_info *devinfo, @@ -404,6 +443,11 @@ brw_nir_lower_fs_inputs(nir_shader *nir, if (devinfo->gen >= 11) nir_lower_interpolation(nir, ~0); + nir_shader_instructions_pass(nir, lower_barycentric_at_offset, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); + /* This pass needs actual constants */ nir_opt_constant_folding(nir);