i965/cs: Implement DispatchComputeIndirect support
authorJordan Justen <jordan.l.justen@intel.com>
Thu, 17 Sep 2015 23:25:24 +0000 (16:25 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Fri, 25 Sep 2015 02:15:13 +0000 (19:15 -0700)
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_compute.c
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/intel_reg.h

index 5693ab5..c392152 100644 (file)
 #include "brw_draw.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
 #include "brw_defines.h"
 
 
 static void
-brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
+brw_emit_gpgpu_walker(struct brw_context *brw,
+                      const void *compute_param,
+                      bool indirect)
 {
    const struct brw_cs_prog_data *prog_data = brw->cs.prog_data;
 
+   const GLuint *num_groups;
+   uint32_t indirect_flag;
+
+   if (!indirect) {
+      num_groups = (const GLuint *)compute_param;
+      indirect_flag = 0;
+   } else {
+      GLintptr indirect_offset = (GLintptr)compute_param;
+      static const GLuint indirect_group_counts[3] = { 0, 0, 0 };
+      num_groups = indirect_group_counts;
+
+      struct gl_buffer_object *indirect_buffer = brw->ctx.DispatchIndirectBuffer;
+      drm_intel_bo *bo = intel_bufferobj_buffer(brw,
+            intel_buffer_object(indirect_buffer),
+            indirect_offset, 3 * sizeof(GLuint));
+
+      indirect_flag = GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE;
+
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMX, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 0);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMY, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 4);
+      brw_load_register_mem(brw, GEN7_GPGPU_DISPATCHDIMZ, bo,
+                            I915_GEM_DOMAIN_VERTEX, 0,
+                            indirect_offset + 8);
+   }
+
    const unsigned simd_size = prog_data->simd_size;
    unsigned group_size = prog_data->local_size[0] *
       prog_data->local_size[1] * prog_data->local_size[2];
@@ -52,7 +84,7 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
    uint32_t dwords = brw->gen < 8 ? 11 : 15;
    BEGIN_BATCH(dwords);
-   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2));
+   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
    OUT_BATCH(0);
    if (brw->gen >= 8) {
       OUT_BATCH(0);                     /* Indirect Data Length */
@@ -83,7 +115,9 @@ brw_emit_gpgpu_walker(struct brw_context *brw, const GLuint *num_groups)
 
 
 static void
-brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
+brw_dispatch_compute_common(struct gl_context *ctx,
+                            const void *compute_param,
+                            bool indirect)
 {
    struct brw_context *brw = brw_context(ctx);
    int estimated_buffer_space_needed;
@@ -117,7 +151,7 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
    brw->no_batch_wrap = true;
    brw_upload_compute_state(brw);
 
-   brw_emit_gpgpu_walker(brw, num_groups);
+   brw_emit_gpgpu_walker(brw, compute_param, indirect);
 
    brw->no_batch_wrap = false;
 
@@ -155,9 +189,24 @@ brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups)
     */
 }
 
+static void
+brw_dispatch_compute(struct gl_context *ctx, const GLuint *num_groups) {
+   brw_dispatch_compute_common(ctx,
+                               num_groups,
+                               false);
+}
+
+static void
+brw_dispatch_compute_indirect(struct gl_context *ctx, GLintptr indirect)
+{
+   brw_dispatch_compute_common(ctx,
+                               (void *)indirect,
+                               true);
+}
 
 void
 brw_init_compute_functions(struct dd_function_table *functions)
 {
    functions->DispatchCompute = brw_dispatch_compute;
+   functions->DispatchComputeIndirect = brw_dispatch_compute_indirect;
 }
index f9d8d1b..f079798 100644 (file)
@@ -2770,6 +2770,8 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK     INTEL_MASK(9, 0)
 #define MEDIA_STATE_FLUSH                       0x7004
 #define GPGPU_WALKER                            0x7105
+/* GEN7 DW0 */
+# define GEN7_GPGPU_INDIRECT_PARAMETER_ENABLE   (1 << 10)
 /* GEN8+ DW2 */
 # define GPGPU_WALKER_INDIRECT_LENGTH_SHIFT     0
 # define GPGPU_WALKER_INDIRECT_LENGTH_MASK      INTEL_MASK(15, 0)
index 58007d3..a261c2b 100644 (file)
 #define GEN7_3DPRIM_START_INSTANCE      0x243C
 #define GEN7_3DPRIM_BASE_VERTEX         0x2440
 
+/* Auto-Compute / Indirect Registers */
+#define GEN7_GPGPU_DISPATCHDIMX         0x2500
+#define GEN7_GPGPU_DISPATCHDIMY         0x2504
+#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+
 #define GEN7_CACHE_MODE_1               0x7004
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE        (1 << 11)
 # define GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE (1 << 13)