panfrost: Allocate RAM backing of shared memory
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 6 Feb 2020 19:29:42 +0000 (14:29 -0500)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Sun, 16 Feb 2020 14:16:46 +0000 (09:16 -0500)
Unlike other GPUs, Mali does not have dedicated shared memory for
compute workloads. Instead, we allocate shared memory (backed to RAM),
and the general memory access functions have modes to access shared
memory (essentially, think of these modes as adding this allocates base
+ workgroupid * stride  in harder). So let's allocate enough memory
based on the shared_size parameter and supply it.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3835>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_compute.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_job.c
src/gallium/drivers/panfrost/pan_job.h

index 6f84248..31156c4 100644 (file)
@@ -124,6 +124,7 @@ panfrost_shader_compile(
                 /* TODO: images */
                 meta->attribute_count = 0;
                 meta->varying_count = 0;
+                state->shared_size = s->info.cs.shared_size;
                 break;
         default:
                 unreachable("Unknown shader state");
index f4c28c3..1901f58 100644 (file)
@@ -27,6 +27,7 @@
  */
 
 #include "pan_context.h"
+#include "pan_bo.h"
 #include "util/u_memory.h"
 #include "nir_serialize.h"
 
@@ -111,6 +112,8 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         /* TODO: Stub */
         struct midgard_payload_vertex_tiler *payload = &ctx->payloads[PIPE_SHADER_COMPUTE];
+        struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
+        struct panfrost_shader_state *ss = &all->variants[all->active_variant];
 
         /* We implement OpenCL inputs as uniforms (or a UBO -- same thing), so
          * reuse the graphics path for this by lowering to Gallium */
@@ -127,8 +130,17 @@ panfrost_launch_grid(struct pipe_context *pipe,
 
         panfrost_emit_for_draw(ctx, false);
 
+        unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size, 128));
+        unsigned shared_size = single_size * info->grid[0] * info->grid[1] * info->grid[2] * 4;
+
         struct mali_shared_memory shared = {
-                .shared_workgroup_count = ~0
+                .shared_memory = panfrost_batch_get_shared_memory(batch, shared_size, 1)->gpu,
+                .shared_workgroup_count =
+                        util_logbase2_ceil(info->grid[0]) +
+                        util_logbase2_ceil(info->grid[1]) +
+                        util_logbase2_ceil(info->grid[2]),
+                .shared_unk1 = 0x2,
+                .shared_shift = util_logbase2(single_size) - 1
         };
 
         payload->postfix.shared_memory =
index fcfcafb..b2736d4 100644 (file)
@@ -216,6 +216,7 @@ struct panfrost_shader_state {
         bool reads_face;
         bool reads_frag_coord;
         unsigned stack_size;
+        unsigned shared_size;
 
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
         gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
index 8f3acd2..fb9812d 100644 (file)
@@ -671,6 +671,24 @@ panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
 }
 
 struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
+                unsigned size,
+                unsigned workgroup_count)
+{
+        if (batch->shared_memory) {
+                assert(batch->shared_memory->size >= size);
+        } else {
+                batch->shared_memory = panfrost_batch_create_bo(batch, size,
+                                             PAN_BO_INVISIBLE,
+                                             PAN_BO_ACCESS_PRIVATE |
+                                             PAN_BO_ACCESS_RW |
+                                             PAN_BO_ACCESS_VERTEX_TILER);
+        }
+
+        return batch->shared_memory;
+}
+
+struct panfrost_bo *
 panfrost_batch_get_tiler_heap(struct panfrost_batch *batch)
 {
         if (batch->tiler_heap)
index ab2db01..55da645 100644 (file)
@@ -86,6 +86,9 @@ struct panfrost_batch {
         /* Amount of thread local storage required per thread */
         unsigned stack_size;
 
+        /* Amount of shared memory needed per workgroup (for compute) */
+        unsigned shared_size;
+
         /* Whether this job uses the corresponding requirement (PAN_REQ_*
          * bitmask) */
         unsigned requirements;
@@ -142,9 +145,12 @@ struct panfrost_batch {
         /* Polygon list bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *polygon_list;
 
-        /* Scratchpath BO bound to the batch, or NULL if none bound yet */
+        /* Scratchpad BO bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *scratchpad;
 
+        /* Shared memory BO bound to the batch, or NULL if none bound yet */
+        struct panfrost_bo *shared_memory;
+
         /* Tiler heap BO bound to the batch, or NULL if none bound yet */
         struct panfrost_bo *tiler_heap;
 
@@ -205,6 +211,9 @@ panfrost_batch_set_requirements(struct panfrost_batch *batch);
 struct panfrost_bo *
 panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned shift, unsigned thread_tls_alloc, unsigned core_count);
 
+struct panfrost_bo *
+panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
+
 mali_ptr
 panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size);