i965: Enable shared local memory for CS shared variables
authorJordan Justen <jordan.l.justen@intel.com>
Tue, 28 Jul 2015 22:30:30 +0000 (15:30 -0700)
committerJordan Justen <jordan.l.justen@intel.com>
Thu, 10 Dec 2015 07:50:38 +0000 (23:50 -0800)
v3:
 * Check shared variable size at link time

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
src/mesa/drivers/dri/i965/brw_cs.c
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/gen7_cs_state.c

index 9ef9bd4..d88e822 100644 (file)
@@ -107,6 +107,19 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
    memset(&prog_data, 0, sizeof(prog_data));
 
+   if (prog->Comp.SharedSize > 64 * 1024) {
+      prog->LinkStatus = false;
+      const char *error_str =
+         "Compute shader used more than 64KB of shared variables";
+      ralloc_strcat(&prog->InfoLog, error_str);
+      _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
+
+      ralloc_free(mem_ctx);
+      return false;
+   } else {
+      prog_data.base.total_shared = prog->Comp.SharedSize;
+   }
+
    assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog,
                                    &cp->program.Base, &prog_data);
 
index ba6290d..4a184cf 100644 (file)
@@ -2884,6 +2884,8 @@ enum brw_wm_barycentric_interp_mode {
 /* GEN7 DW5, GEN8+ DW6 */
 # define MEDIA_BARRIER_ENABLE_SHIFT             21
 # define MEDIA_BARRIER_ENABLE_MASK              INTEL_MASK(21, 21)
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT   16
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK    INTEL_MASK(20, 16)
 # define MEDIA_GPGPU_THREAD_COUNT_SHIFT         0
 # define MEDIA_GPGPU_THREAD_COUNT_MASK          INTEL_MASK(7, 0)
 # define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT    0
index 0922c22..1fde69c 100644 (file)
@@ -166,8 +166,20 @@ brw_upload_cs_state(struct brw_context *brw)
       SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
       SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
    assert(threads <= brw->max_cs_threads);
+
+   assert(prog_data->total_shared <= 64 * 1024);
+   uint32_t slm_size = 0;
+   if (prog_data->total_shared > 0) {
+      /* slm_size is in 4k increments, but must be a power of 2. */
+      slm_size = 4 * 1024;
+      while (slm_size < prog_data->total_shared)
+         slm_size <<= 1;
+      slm_size /= 4 * 1024;
+   }
+
    desc[dw++] =
       SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+      SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |
       media_threads;
 
    BEGIN_BATCH(4);