i965: Add support for gl_BaseVertexARB and gl_BaseInstanceARB
authorKristian Høgsberg Kristensen <krh@bitplanet.net>
Thu, 10 Dec 2015 20:24:50 +0000 (12:24 -0800)
committerKristian Høgsberg Kristensen <krh@bitplanet.net>
Tue, 29 Dec 2015 18:39:25 +0000 (10:39 -0800)
We already have gl_BaseVertexARB in the .x component of the SGVS vec4
and plug gl_BaseInstanceARB into the last free component (.y).

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
src/mesa/drivers/dri/i965/brw_compiler.h
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_draw.c
src/mesa/drivers/dri/i965/brw_draw_upload.c
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
src/mesa/drivers/dri/i965/gen8_draw_upload.c

index e66deb1..9b3bb9f 100644 (file)
@@ -595,6 +595,8 @@ struct brw_vs_prog_data {
 
    bool uses_vertexid;
    bool uses_instanceid;
+   bool uses_basevertex;
+   bool uses_baseinstance;
 };
 
 struct brw_tcs_prog_data
index 0239b62..4cbe585 100644 (file)
@@ -909,8 +909,13 @@ struct brw_context
    uint32_t pma_stall_bits;
 
    struct {
-      /** The value of gl_BaseVertex for the current _mesa_prim. */
-      int gl_basevertex;
+      struct {
+         /** The value of gl_BaseVertex for the current _mesa_prim. */
+         int gl_basevertex;
+
+         /** The value of gl_BaseInstance for the current _mesa_prim. */
+         int gl_baseinstance;
+      } params;
 
       /**
        * Buffer and offset used for GL_ARB_shader_draw_parameters
index 8398471..e0665d3 100644 (file)
@@ -491,9 +491,9 @@ brw_try_draw_prims(struct gl_context *ctx,
          }
       }
 
-      brw->draw.gl_basevertex =
+      brw->draw.params.gl_basevertex =
          prims[i].indexed ? prims[i].basevertex : prims[i].start;
-
+      brw->draw.params.gl_baseinstance = prims[i].base_instance;
       drm_intel_bo_unreference(brw->draw.draw_params_bo);
 
       if (prims[i].is_indirect) {
index ea0f6f2..ccf963c 100644 (file)
@@ -592,8 +592,10 @@ void
 brw_prepare_shader_draw_parameters(struct brw_context *brw)
 {
    /* For non-indirect draws, upload gl_BaseVertex. */
-   if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
-      intel_upload_data(brw, &brw->draw.gl_basevertex, 4, 4,
+   if ((brw->vs.prog_data->uses_basevertex ||
+        brw->vs.prog_data->uses_baseinstance) &&
+       brw->draw.draw_params_bo == NULL) {
+      intel_upload_data(brw, &brw->draw.params, sizeof(brw->draw.params), 4,
                        &brw->draw.draw_params_bo,
                         &brw->draw.draw_params_offset);
    }
@@ -658,7 +660,8 @@ brw_emit_vertices(struct brw_context *brw)
    brw_emit_query_begin(brw);
 
    unsigned nr_elements = brw->vb.nr_enabled;
-   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
+       brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance)
       ++nr_elements;
 
    /* If the VS doesn't read any inputs (calculating vertex position from
@@ -693,8 +696,10 @@ brw_emit_vertices(struct brw_context *brw)
    /* Now emit VB and VEP state packets.
     */
 
-   unsigned nr_buffers =
-      brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+   const bool uses_draw_params =
+      brw->vs.prog_data->uses_basevertex ||
+      brw->vs.prog_data->uses_baseinstance;
+   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
 
    if (nr_buffers) {
       if (brw->gen >= 6) {
@@ -713,7 +718,7 @@ brw_emit_vertices(struct brw_context *brw)
 
       }
 
-      if (brw->vs.prog_data->uses_vertexid) {
+      if (uses_draw_params) {
          EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers,
                                   brw->draw.draw_params_bo,
                                   brw->draw.draw_params_bo->size - 1,
@@ -790,21 +795,25 @@ brw_emit_vertices(struct brw_context *brw)
                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
    }
 
-   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid ||
+       brw->vs.prog_data->uses_basevertex || brw->vs.prog_data->uses_baseinstance) {
       uint32_t dw0 = 0, dw1 = 0;
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;
 
-      if (brw->vs.prog_data->uses_vertexid) {
+      if (brw->vs.prog_data->uses_basevertex)
          comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      if (brw->vs.prog_data->uses_baseinstance)
+         comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      if (brw->vs.prog_data->uses_vertexid)
          comp2 = BRW_VE1_COMPONENT_STORE_VID;
-      }
 
-      if (brw->vs.prog_data->uses_instanceid) {
+      if (brw->vs.prog_data->uses_instanceid)
          comp3 = BRW_VE1_COMPONENT_STORE_IID;
-      }
 
       dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
             (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
@@ -814,11 +823,11 @@ brw_emit_vertices(struct brw_context *brw)
       if (brw->gen >= 6) {
          dw0 |= GEN6_VE0_VALID |
                 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
+                BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
       } else {
          dw0 |= BRW_VE0_VALID |
                 brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
+                BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT;
         dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
       }
 
index 6ac2f85..8235ce7 100644 (file)
@@ -1671,7 +1671,8 @@ fs_visitor::assign_vs_urb_setup()
 
    assert(stage == MESA_SHADER_VERTEX);
    int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
-   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
+   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
+       vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance)
       count++;
 
    /* Each attribute is 4 regs. */
index bb6ab53..5b901a0 100644 (file)
@@ -222,6 +222,13 @@ emit_system_values_block(nir_block *block, void *void_visitor)
             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
          break;
 
+      case nir_intrinsic_load_base_instance:
+         assert(v->stage == MESA_SHADER_VERTEX);
+         reg = &v->nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
+         if (reg->file == BAD_FILE)
+            *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_INSTANCE);
+         break;
+
       case nir_intrinsic_load_invocation_id:
          assert(v->stage == MESA_SHADER_GEOMETRY);
          reg = &v->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
@@ -1747,7 +1754,8 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
 
    case nir_intrinsic_load_vertex_id_zero_base:
    case nir_intrinsic_load_base_vertex:
-   case nir_intrinsic_load_instance_id: {
+   case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_base_instance: {
       gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
       fs_reg val = nir_system_values[sv];
       assert(val.file != BAD_FILE);
index 7036ad8..d6941fa 100644 (file)
@@ -43,7 +43,11 @@ fs_visitor::emit_vs_system_value(int location)
    switch (location) {
    case SYSTEM_VALUE_BASE_VERTEX:
       reg->reg_offset = 0;
-      vs_prog_data->uses_vertexid = true;
+      vs_prog_data->uses_basevertex = true;
+      break;
+   case SYSTEM_VALUE_BASE_INSTANCE:
+      reg->reg_offset = 1;
+      vs_prog_data->uses_baseinstance = true;
       break;
    case SYSTEM_VALUE_VERTEX_ID:
       unreachable("should have been lowered");
index f0f18ca..b2a27d8 100644 (file)
@@ -1581,7 +1581,8 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
     * don't represent it with a flag in inputs_read, so we call it
     * VERT_ATTRIB_MAX.
     */
-   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
+   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
+       vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
       attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
    }
 
@@ -1982,7 +1983,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
     * incoming vertex attribute.  So, add an extra slot.
     */
    if (shader->info.system_values_read &
-       (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
+       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
+        BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
+        BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
         BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
       nr_attributes++;
    }
index ab71304..c20da9b 100644 (file)
@@ -78,6 +78,13 @@ vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
                                            glsl_type::int_type);
       break;
 
+   case nir_intrinsic_load_base_instance:
+      reg = &nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
+      if (reg->file == BAD_FILE)
+         *reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_INSTANCE,
+                                           glsl_type::int_type);
+      break;
+
    default:
       break;
    }
@@ -669,6 +676,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
    case nir_intrinsic_load_vertex_id_zero_base:
    case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_instance_id:
+   case nir_intrinsic_load_base_instance:
    case nir_intrinsic_load_invocation_id:
    case nir_intrinsic_load_tess_level_inner:
    case nir_intrinsic_load_tess_level_outer: {
index fd8be7d..bd6a9a4 100644 (file)
@@ -155,7 +155,11 @@ vec4_vs_visitor::make_reg_for_system_value(int location,
    switch (location) {
    case SYSTEM_VALUE_BASE_VERTEX:
       reg->writemask = WRITEMASK_X;
-      vs_prog_data->uses_vertexid = true;
+      vs_prog_data->uses_basevertex = true;
+      break;
+   case SYSTEM_VALUE_BASE_INSTANCE:
+      reg->writemask = WRITEMASK_Y;
+      vs_prog_data->uses_baseinstance = true;
       break;
    case SYSTEM_VALUE_VERTEX_ID:
    case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
index 198d612..451cf0b 100644 (file)
@@ -115,7 +115,11 @@ gen8_emit_vertices(struct brw_context *brw)
    }
 
    /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
-   unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+   const bool uses_draw_params =
+      brw->vs.prog_data->uses_basevertex ||
+      brw->vs.prog_data->uses_baseinstance;
+   const unsigned nr_buffers = brw->vb.nr_buffers + uses_draw_params;
+
    if (nr_buffers) {
       assert(nr_buffers <= 33);
 
@@ -135,7 +139,7 @@ gen8_emit_vertices(struct brw_context *brw)
          OUT_BATCH(buffer->bo->size);
       }
 
-      if (brw->vs.prog_data->uses_vertexid) {
+      if (uses_draw_params) {
          OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
                    GEN7_VB0_ADDRESS_MODIFYENABLE |
                    mocs_wb << 16);
@@ -148,16 +152,18 @@ gen8_emit_vertices(struct brw_context *brw)
 
    /* Normally we don't need an element for the SGVS attribute because the
     * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
-    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
-    * vertex ID is used then it needs an element for the base vertex buffer.
-    * Additionally if there is an edge flag element then the SGVS can't be
-    * inserted past that so we need a dummy element to ensure that the edge
-    * flag is the last one.
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
+    * we're using draw parameters then we need an element for the those
+    * values.  Additionally if there is an edge flag element then the SGVS
+    * can't be inserted past that so we need a dummy element to ensure that
+    * the edge flag is the last one.
     */
-   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
-                              (brw->vs.prog_data->uses_instanceid &&
-                               uses_edge_flag));
-   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
+   const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
+                                    brw->vs.prog_data->uses_baseinstance ||
+                                    ((brw->vs.prog_data->uses_instanceid ||
+                                      brw->vs.prog_data->uses_vertexid) &&
+                                     uses_edge_flag));
+   const unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;
 
    /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
     * presumably for VertexID/InstanceID.
@@ -212,12 +218,13 @@ gen8_emit_vertices(struct brw_context *brw)
    }
 
    if (needs_sgvs_element) {
-      if (brw->vs.prog_data->uses_vertexid) {
+      if (brw->vs.prog_data->uses_basevertex ||
+          brw->vs.prog_data->uses_baseinstance) {
          OUT_BATCH(GEN6_VE0_VALID |
                    brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+                   BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
          OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
                    (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                    (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
       } else {