From 6c7a2b69f8df5a9c0cb8c8f3b099694bbe1b0b53 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 29 Oct 2019 10:12:28 +0100
Subject: [PATCH] v3d: handle writes to gl_Layer from geometry shaders
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

When geometry shaders write a value to gl_Layer that doesn't correspond to
an existing layer in the target framebuffer the rendering behavior is
undefined according to the spec, however, there are CTS tests that trigger
this scenario on purpose, probably to ensure that nothing terrible happens.

For V3D, this situation is problematic because the binner uses the layer
index to select the offset to write into the tile state data, and we only
allocate tile state for MAX2(num_layers, 1), so we want to make sure we
don't produce values that would lead to out of bounds writes. The simulator
has an assert to catch this, although we haven't observed issues in actual
hardware it is probably best to play safe.

Reviewed-by: Alejandro PiÃ±eiro <apinheiro@igalia.com>
---
 src/broadcom/compiler/nir_to_vir.c       |  5 ++++
 src/broadcom/compiler/v3d_compiler.h     |  8 +++++++
 src/broadcom/compiler/v3d_nir_lower_io.c | 40 ++++++++++++++++++++++++++++++++
 src/compiler/nir/nir_intrinsics.py       |  4 ++++
 src/gallium/drivers/v3d/v3d_uniforms.c   |  8 +++++++
 5 files changed, 65 insertions(+)

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index e566325..e2de77d 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                 ntq_store_dest(c, &instr->dest, 0, vir_IID(c));
                 break;
 
+        case nir_intrinsic_load_fb_layers_v3d:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_uniform(c, QUNIFORM_FB_LAYERS, 0));
+                break;
+
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 4249c18..0489ebd 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -279,6 +279,14 @@ enum quniform_contents {
          * L2T cache will effectively be the shared memory area.
          */
         QUNIFORM_SHARED_OFFSET,
+
+        /**
+         * Returns the number of layers in the framebuffer.
+         *
+         * This is used to cap gl_Layer in geometry shaders to avoid
+         * out-of-bounds accesses into the tile state during binning.
+         */
+        QUNIFORM_FB_LAYERS,
 };
 
 static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
index 9b7db65..855b9c4 100644
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
@@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
         }
 
+        if (var->data.location == VARYING_SLOT_LAYER) {
+                assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
+                nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
+                header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
+
+                /* From the GLES 3.2 spec:
+                 *
+                 *    "When fragments are written to a layered framebuffer, the
+                 *     fragmentâs layer number selects an image from the array
+                 *     of images at each attachment (...). If the fragmentâs
+                 *     layer number is negative, or greater than or equal to
+                 *     the minimum number of layers of any attachment, the
+                 *     effects of the fragment on the framebuffer contents are
+                 *     undefined."
+                 *
+                 * This suggests we can just ignore that situation, however,
+                 * for V3D an out-of-bounds layer index means that the binner
+                 * might do out-of-bounds writes access to the tile state. The
+                 * simulator has an assert to catch this, so we play safe here
+                 * and we make sure that doesn't happen by setting gl_Layer
+                 * to 0 in that case (we always allocate tile state for at
+                 * least one layer).
+                 */
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_fb_layers_v3d);
+                load->num_components = 1;
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+                nir_ssa_def *fb_layers = &load->dest.ssa;
+
+                nir_ssa_def *cond = nir_ige(b, src, fb_layers);
+                nir_ssa_def *layer_id =
+                        nir_bcsel(b, cond,
+                                  nir_imm_int(b, 0),
+                                  nir_ishl(b, src, nir_imm_int(b, 16)));
+                header = nir_ior(b, header, layer_id);
+                nir_store_var(b, state->gs.header_var, header, 0x1);
+        }
+
         /* Scalarize outputs if it hasn't happened already, since we want to
          * schedule each VPM write individually.  We can skip any outut
          * components not read by the FS.
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 3939f8f..c53babd 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], [])
 # src[] = { value, render_target }
 # BASE = sample index
 store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], [])
+
+# V3D-specific intrinsic to load the number of layers attached to
+# the target framebuffer
+intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
index c94f6be..ab57880 100644
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                                          v3d->compute_shared_memory, 0);
                         break;
 
+                case QUNIFORM_FB_LAYERS:
+                        cl_aligned_u32(&uniforms, job->num_layers);
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
 
@@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
                         /* Compute always recalculates uniforms. */
                         break;
 
+                case QUNIFORM_FB_LAYERS:
+                        dirty |= VC5_DIRTY_FRAMEBUFFER;
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
                         dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |
-- 
2.7.4