ir3: Fix FS quad ops returning wrong values from helper invocations
authorDanylo Piliaiev <dpiliaiev@igalia.com>
Tue, 18 Jul 2023 12:41:03 +0000 (14:41 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 19 Jul 2023 19:41:51 +0000 (19:41 +0000)
Without SP_FS_CTRL_REG0.LODPIXMASK quad ops don't get values from
helper invocations, but from the current one.

Fixes:
 dEQP-VK.glsl.derivate.dfdxsubgroup.*
 dEQP-VK.glsl.derivate.dfdysubgroup.*

Cc: mesa-stable
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24211>

src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_legalize.c
src/freedreno/ir3/ir3_shader.h
src/freedreno/vulkan/tu_pipeline.cc
src/gallium/drivers/freedreno/a6xx/fd6_program.cc

index 0890f05..5d0ee2b 100644 (file)
@@ -5009,8 +5009,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
    collect_tex_prefetches(ctx, ir);
 
    if (so->type == MESA_SHADER_FRAGMENT &&
-       ctx->s->info.fs.needs_quad_helper_invocations)
+       ctx->s->info.fs.needs_quad_helper_invocations) {
       so->need_pixlod = true;
+      so->need_full_quad = true;
+   }
 
    if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
        !ctx->s->info.fs.early_fragment_tests)
index 6939ce5..a344a00 100644 (file)
@@ -400,7 +400,7 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
          struct ir3_instruction *op_p = ir3_instr_clone(n);
          op_p->flags = IR3_INSTR_P;
 
-         ctx->so->need_fine_derivatives = true;
+         ctx->so->need_full_quad = true;
       }
    }
 
index 6c92173..f852e58 100644 (file)
@@ -688,7 +688,7 @@ struct ir3_shader_variant {
    /* do we need derivatives: */
    bool need_pixlod;
 
-   bool need_fine_derivatives;
+   bool need_full_quad;
 
    /* do we need VS driver params? */
    bool need_driver_params;
index 59d5ca5..3d9b272 100644 (file)
@@ -496,7 +496,7 @@ tu6_emit_xs(struct tu_cs *cs,
                .branchstack = ir3_shader_branchstack_hw(xs),
                .threadsize = thrsz,
                .varying = xs->total_in != 0,
-               .lodpixmask = xs->need_fine_derivatives,
+               .lodpixmask = xs->need_full_quad,
                /* unknown bit, seems unnecessary */
                .unk24 = true,
                .pixlodenable = xs->need_pixlod,
index cbb987c..f7d82c5 100644 (file)
@@ -933,7 +933,7 @@ setup_stateobj(struct fd_screen *screen, struct fd_ringbuffer *ring,
       ring,
       A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
          COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) | 0x1000000 |
-         COND(fs->need_fine_derivatives, A6XX_SP_FS_CTRL_REG0_LODPIXMASK) |
+         COND(fs->need_full_quad, A6XX_SP_FS_CTRL_REG0_LODPIXMASK) |
          A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
          A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
          COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |