pan/bi: Add support for gl_{BaseVertex,BaseInstance}
authorBoris Brezillon <boris.brezillon@collabora.com>
Tue, 11 May 2021 09:16:46 +0000 (11:16 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 12 May 2021 07:03:51 +0000 (07:03 +0000)
Extend the VERTEX_INSTANCE_OFFSETS sysval to pass
BaseVertex/BaseInstance information to the shader.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10417>

src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_screen.c
src/panfrost/bifrost/bifrost_compile.c
src/panfrost/lib/pan_indirect_draw.c
src/panfrost/lib/pan_indirect_draw.h
src/panfrost/util/pan_sysval.c

index 1d5a4a6..930d37e 100644 (file)
@@ -1083,8 +1083,14 @@ panfrost_upload_sysvals(struct panfrost_batch *batch,
                 case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
                         batch->ctx->first_vertex_sysval_ptr =
                                 ptr->gpu + (i * sizeof(*uniforms));
+                        batch->ctx->base_vertex_sysval_ptr =
+                                batch->ctx->first_vertex_sysval_ptr + 4;
+                        batch->ctx->base_instance_sysval_ptr =
+                                batch->ctx->first_vertex_sysval_ptr + 8;
 
                         uniforms[i].u[0] = batch->ctx->offset_start;
+                        uniforms[i].u[1] = batch->ctx->base_vertex;
+                        uniforms[i].u[2] = batch->ctx->base_instance;
                         break;
                 default:
                         assert(0);
@@ -1200,6 +1206,12 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
                                 case 0:
                                         batch->ctx->first_vertex_sysval_ptr = ptr;
                                         break;
+                                case 1:
+                                        batch->ctx->base_vertex_sysval_ptr = ptr;
+                                        break;
+                                case 2:
+                                        batch->ctx->base_instance_sysval_ptr = ptr;
+                                        break;
                                 default:
                                         unreachable("Invalid vertex/instance offset component\n");
                                 }
@@ -1730,6 +1742,12 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 /* BOs aligned to 4k so guaranteed aligned to 64 */
                 src_offset += (buf->buffer_offset & 63);
 
+                /* Base instance offset */
+                if (ctx->base_instance && so->pipe[i].instance_divisor) {
+                        src_offset += (ctx->base_instance * buf->stride) /
+                                      so->pipe[i].instance_divisor;
+                }
+
                 /* Also, somewhat obscurely per-instance data needs to be
                  * offset in response to a delayed start in an indexed draw */
 
index 98cf726..fe06b5a 100644 (file)
@@ -450,6 +450,8 @@ panfrost_direct_draw(struct panfrost_context *ctx,
         ctx->indirect_draw = false;
         ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
         ctx->instance_count = info->instance_count;
+        ctx->base_vertex = info->index_size ? draw->index_bias : 0;
+        ctx->base_instance = info->start_instance;
         ctx->active_prim = info->mode;
 
         struct panfrost_ptr tiler =
@@ -613,6 +615,9 @@ panfrost_indirect_draw(struct panfrost_context *ctx,
          * vertex shader uses gl_VertexID or gl_BaseVertex.
          */
         ctx->first_vertex_sysval_ptr = 0;
+        ctx->base_vertex_sysval_ptr = 0;
+        ctx->base_instance_sysval_ptr = 0;
+
         bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS);
 
         panfrost_emit_varying_descriptor(batch, 0,
@@ -660,6 +665,8 @@ panfrost_indirect_draw(struct panfrost_context *ctx,
                 .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset,
                 .index_buf = index_buf ? index_buf->ptr.gpu : 0,
                 .first_vertex_sysval = ctx->first_vertex_sysval_ptr,
+                .base_vertex_sysval = ctx->base_vertex_sysval_ptr,
+                .base_instance_sysval = ctx->base_instance_sysval_ptr,
                 .vertex_job = vertex.gpu,
                 .tiler_job = tiler.gpu,
                 .attrib_bufs = attrib_bufs,
index de2ac6c..b825b13 100644 (file)
@@ -137,7 +137,11 @@ struct panfrost_context {
         unsigned vertex_count;
         unsigned instance_count;
         unsigned offset_start;
+        unsigned base_vertex;
+        unsigned base_instance;
         mali_ptr first_vertex_sysval_ptr;
+        mali_ptr base_vertex_sysval_ptr;
+        mali_ptr base_instance_sysval_ptr;
         enum pipe_prim_type active_prim;
 
         /* If instancing is enabled, vertex count padded for instance; if
index 12ae7fc..d8dffc4 100644 (file)
@@ -310,6 +310,9 @@ panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
         case PIPE_CAP_DRAW_INDIRECT:
                 return has_heap && is_deqp;
 
+        case PIPE_CAP_START_INSTANCE:
+                return pan_is_bifrost(dev) && is_deqp;
+
         default:
                 return u_pipe_screen_get_param_defaults(screen, param);
         }
index 0045a8f..98fd1d3 100644 (file)
@@ -1158,6 +1158,14 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
                 bi_load_sysval_nir(b, instr, 1, 0);
                 break;
 
+        case nir_intrinsic_load_base_vertex:
+                bi_load_sysval_nir(b, instr, 1, 4);
+                break;
+
+        case nir_intrinsic_load_base_instance:
+                bi_load_sysval_nir(b, instr, 1, 8);
+                break;
+
         case nir_intrinsic_get_ssbo_size:
                 bi_load_sysval_nir(b, instr, 1, 8);
                 break;
index 78a5920..16721c4 100644 (file)
@@ -55,6 +55,7 @@ struct draw_data {
         nir_ssa_def *index_buf;
         nir_ssa_def *restart_index;
         nir_ssa_def *vertex_count;
+        nir_ssa_def *start_instance;
         nir_ssa_def *instance_count;
         nir_ssa_def *vertex_start;
         nir_ssa_def *index_bias;
@@ -73,6 +74,8 @@ struct jobs_data {
         nir_ssa_def *tiler_job;
         nir_ssa_def *base_vertex_offset;
         nir_ssa_def *first_vertex_sysval;
+        nir_ssa_def *base_vertex_sysval;
+        nir_ssa_def *base_instance_sysval;
         nir_ssa_def *offset_start;
         nir_ssa_def *invocation;
 };
@@ -160,6 +163,9 @@ struct indirect_draw_inputs {
 
         /* {base,first}_{vertex,instance} sysvals */
         mali_ptr first_vertex_sysval;
+        mali_ptr base_vertex_sysval;
+        mali_ptr base_instance_sysval;
+
         /* Pointers to various cmdstream structs that need to be patched */
         mali_ptr vertex_job;
         mali_ptr tiler_job;
@@ -318,6 +324,8 @@ extract_inputs(struct indirect_draw_shader_builder *builder)
                 return;
 
         builder->jobs.first_vertex_sysval = get_input_field(b, first_vertex_sysval);
+        builder->jobs.base_vertex_sysval = get_input_field(b, base_vertex_sysval);
+        builder->jobs.base_instance_sysval = get_input_field(b, base_instance_sysval);
         builder->jobs.vertex_job = get_input_field(b, vertex_job);
         builder->jobs.tiler_job = get_input_field(b, tiler_job);
         builder->attribs.attrib_bufs = get_input_field(b, attrib_bufs);
@@ -506,7 +514,8 @@ update_vertex_attrib_buf(struct indirect_draw_shader_builder *builder,
 
 static void
 adjust_attrib_offset(struct indirect_draw_shader_builder *builder,
-                     nir_ssa_def *attrib_ptr, nir_ssa_def *attrib_buf_ptr)
+                     nir_ssa_def *attrib_ptr, nir_ssa_def *attrib_buf_ptr,
+                     nir_ssa_def *instance_div)
 {
         nir_builder *b = &builder->b;
         nir_ssa_def *zero = nir_imm_int(b, 0);
@@ -515,18 +524,34 @@ adjust_attrib_offset(struct indirect_draw_shader_builder *builder,
                 nir_iand(b, nir_ine(b, builder->jobs.offset_start, zero),
                          nir_ige(b, builder->draw.instance_count, two));
 
-        IF (sub_cur_offset) {
+        nir_ssa_def *add_base_inst_offset =
+                nir_iand(b, nir_ine(b, builder->draw.start_instance, zero),
+                         nir_ine(b, instance_div, zero));
+
+        IF (nir_ior(b, sub_cur_offset, add_base_inst_offset)) {
+                nir_ssa_def *offset =
+                        load_global(b, get_address_imm(b, attrib_ptr, WORD(1)), 1, 32);
+                nir_ssa_def *stride =
+                        load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)), 1, 32);
+
                 /* Per-instance data needs to be offset in response to a
                  * delayed start in an indexed draw.
                  */
-                nir_ssa_def *stride =
-                        load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)), 1, 32);
-                nir_ssa_def *offset =
-                        load_global(b, get_address_imm(b, attrib_ptr, WORD(1)), 1, 32);
 
-                offset = nir_isub(b, offset,
-                                  nir_imul(b, stride,
-                                  builder->jobs.offset_start));
+                IF (add_base_inst_offset) {
+                        offset = nir_iadd(b, offset,
+                                          nir_idiv(b,
+                                                   nir_imul(b, stride,
+                                                            builder->draw.start_instance),
+                                                   instance_div));
+                } ENDIF
+
+                IF (sub_cur_offset) {
+                        offset = nir_isub(b, offset,
+                                          nir_imul(b, stride,
+                                                   builder->jobs.offset_start));
+                } ENDIF
+
                 store_global(b, get_address_imm(b, attrib_ptr, WORD(1)),
                              offset, 1);
         } ENDIF
@@ -600,10 +625,10 @@ update_vertex_attribs(struct indirect_draw_shader_builder *builder)
                         } ENDIF
                 }
 
-                nir_ssa_def *div =
+                nir_ssa_def *instance_div =
                         load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(7)), 1, 32);
 
-                div = nir_imul(b, div, builder->instance_size.padded);
+                nir_ssa_def *div = nir_imul(b, instance_div, builder->instance_size.padded);
 
                 nir_ssa_def *multi_instance =
                         nir_ige(b, builder->draw.instance_count, nir_imm_int(b, 2));
@@ -636,7 +661,7 @@ update_vertex_attribs(struct indirect_draw_shader_builder *builder)
                                                          nir_imm_int(b, 31), NULL);
                         } ENDIF
 
-                        adjust_attrib_offset(builder, attrib_ptr, attrib_buf_ptr);
+                        adjust_attrib_offset(builder, attrib_ptr, attrib_buf_ptr, instance_div);
                 } ELSE {
                         IF (multi_instance) {
                                 update_vertex_attrib_buf(builder, attrib_buf_ptr,
@@ -888,12 +913,14 @@ patch(struct indirect_draw_shader_builder *builder)
 
         if (index_size) {
                 builder->draw.vertex_count = get_indexed_draw_field(b, draw_ptr, count);
+                builder->draw.start_instance = get_indexed_draw_field(b, draw_ptr, start_instance);
                 builder->draw.instance_count =
                         get_indexed_draw_field(b, draw_ptr, instance_count);
                 builder->draw.vertex_start = get_indexed_draw_field(b, draw_ptr, start);
                 builder->draw.index_bias = get_indexed_draw_field(b, draw_ptr, index_bias);
         } else {
                 builder->draw.vertex_count = get_draw_field(b, draw_ptr, count);
+                builder->draw.start_instance = get_draw_field(b, draw_ptr, start_instance);
                 builder->draw.instance_count = get_draw_field(b, draw_ptr, instance_count);
                 builder->draw.vertex_start = get_draw_field(b, draw_ptr, start);
         }
@@ -914,6 +941,20 @@ patch(struct indirect_draw_shader_builder *builder)
                 store_global(b, builder->jobs.first_vertex_sysval,
                              builder->jobs.offset_start, 1);
         } ENDIF
+
+        IF (nir_ine(b, builder->jobs.base_vertex_sysval, nir_imm_int64(b, 0))) {
+                store_global(b, builder->jobs.base_vertex_sysval,
+                             index_size ?
+                             builder->draw.index_bias :
+                             nir_imm_int(b, 0),
+                             1);
+        } ENDIF
+
+        IF (nir_ine(b, builder->jobs.base_instance_sysval, nir_imm_int64(b, 0))) {
+                store_global(b, builder->jobs.base_instance_sysval,
+                             builder->draw.start_instance, 1);
+        } ENDIF
+
 }
 
 /* Search the min/max index in the range covered by the indirect draw call */
@@ -1255,6 +1296,8 @@ panfrost_emit_indirect_draw(struct pan_pool *pool,
                 .draw_buf = draw_info->draw_buf,
                 .index_buf = draw_info->index_buf,
                 .first_vertex_sysval = draw_info->first_vertex_sysval,
+                .base_vertex_sysval = draw_info->base_vertex_sysval,
+                .base_instance_sysval = draw_info->base_instance_sysval,
                 .vertex_job = draw_info->vertex_job,
                 .tiler_job = draw_info->tiler_job,
                 .attrib_bufs = draw_info->attrib_bufs,
index f20d10a..773bb8f 100644 (file)
@@ -32,6 +32,8 @@ struct pan_indirect_draw_info {
         mali_ptr draw_buf;
         mali_ptr index_buf;
         mali_ptr first_vertex_sysval;
+        mali_ptr base_vertex_sysval;
+        mali_ptr base_instance_sysval;
         mali_ptr vertex_job;
         mali_ptr tiler_job;
         mali_ptr attrib_bufs;
index 7d17cf0..128183a 100644 (file)
@@ -79,6 +79,8 @@ panfrost_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
         case nir_intrinsic_load_sample_positions_pan:
                 return PAN_SYSVAL_SAMPLE_POSITIONS;
         case nir_intrinsic_load_first_vertex:
+        case nir_intrinsic_load_base_vertex:
+        case nir_intrinsic_load_base_instance:
                 return PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS;
         case nir_intrinsic_load_ssbo_address: 
         case nir_intrinsic_get_ssbo_size: