From abe5b06a99ef47895e86d5c5fed8545a17fd9720 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 23 Jun 2023 10:47:20 -0400 Subject: [PATCH] pan/bi: Use lower_frag_coord_to_pixel_coord Instead of vendoring the logic. This has a side benefit of letting NIR optimize the generated code a bit. total instructions in shared programs: 2687284 -> 2687281 (<.01%) instructions in affected programs: 532 -> 529 (-0.56%) helped: 3 HURT: 1 Inconclusive result (value mean confidence interval includes 0). total cycles in shared programs: 140711.33 -> 140711.31 (<.01%) cycles in affected programs: 2.53 -> 2.52 (-0.62%) helped: 1 HURT: 0 total fma in shared programs: 22059.44 -> 22059.39 (<.01%) fma in affected programs: 2.69 -> 2.64 (-1.74%) helped: 3 HURT: 0 total cvt in shared programs: 14659.09 -> 14659.09 (0.00%) cvt in affected programs: 1.56 -> 1.56 (0.00%) helped: 1 HURT: 1 total quadwords in shared programs: 1455408 -> 1455416 (<.01%) quadwords in affected programs: 128 -> 136 (6.25%) helped: 0 HURT: 1 Signed-off-by: Alyssa Rosenzweig Reviewed-by: Boris Brezillon Part-of: --- src/panfrost/compiler/bifrost_compile.c | 38 ++++++++++++--------------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 56e5b64..1e929f8 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -1456,30 +1456,14 @@ bi_emit_atomic_i32_to(bi_builder *b, bi_index dst, bi_index addr, bi_index arg, } } -/* gl_FragCoord.xy = u16_to_f32(R59.xy) + 0.5 - * gl_FragCoord.z = ld_vary(fragz) - * gl_FragCoord.w = ld_vary(fragw) - */ - static void -bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr) +bi_emit_load_frag_coord_zw(bi_builder *b, bi_index dst, unsigned channel) { - bi_index src[4] = {}; - - for (unsigned i = 0; i < 2; ++i) { - src[i] = bi_fadd_f32(b, bi_u16_to_f32(b, bi_half(bi_preload(b, 59), i)), - bi_imm_f32(0.5f)); - } - - for (unsigned i = 0; i < 2; ++i) { - src[2 + i] = bi_ld_var_special( - b, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, - BI_UPDATE_CLOBBER, - (i == 0) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W, - BI_VECSIZE_NONE); - } - - bi_make_vec_to(b, bi_dest_index(&instr->dest), src, NULL, 4, 32); + bi_ld_var_special_to( + b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, + BI_UPDATE_CLOBBER, + (channel == 2) ? BI_VARYING_NAME_FRAG_Z : BI_VARYING_NAME_FRAG_W, + BI_VECSIZE_NONE); } static void @@ -1657,8 +1641,13 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr) bi_split_dest(b, instr->dest); break; - case nir_intrinsic_load_frag_coord: - bi_emit_load_frag_coord(b, instr); + case nir_intrinsic_load_pixel_coord: + /* Vectorized load of the preloaded i16vec2 */ + bi_mov_i32_to(b, dst, bi_preload(b, 59)); + break; + + case nir_intrinsic_load_frag_coord_zw: + bi_emit_load_frag_coord_zw(b, dst, nir_intrinsic_component(instr)); break; case nir_intrinsic_load_converted_output_pan: @@ -4763,6 +4752,7 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false /* always_precise */); NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_lower_alu); + NIR_PASS_V(nir, nir_lower_frag_coord_to_pixel_coord); } static bi_context * -- 2.7.4