panfrost: Rename tiler fields per tiler research
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Thu, 13 Jun 2019 17:25:32 +0000 (10:25 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 17 Jun 2019 14:47:49 +0000 (07:47 -0700)
Following the research into Midgard's hierarchical tiling
infrastructure, we now understand (in broad stokes) the purpose of each
tiler field in the MFBD. Additionally, we understand more of the tiling
fields in the SFBD and in Bifrost's structures, although this knowledge
is still incomplete.

Update the names, decoder, and comments to reflect this new
understanding.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/gallium/drivers/panfrost/include/panfrost-job.h
src/gallium/drivers/panfrost/pan_context.c
src/gallium/drivers/panfrost/pandecode/decode.c

index 401fef8..c7cb2d7 100644 (file)
@@ -963,7 +963,8 @@ struct bifrost_tiler_heap_meta {
 
 struct bifrost_tiler_meta {
         u64 zero0;
-        u32 unk; // = 0xf0
+        u16 hierarchy_mask;
+        u16 flags;
         u16 width;
         u16 height;
         u64 zero1;
@@ -1362,13 +1363,18 @@ struct mali_single_framebuffer {
 
         u32 zero6[7];
 
-        /* Very weird format, see generation code in trans_builder.c */
+        /* Logically, by symmetry to the MFBD, this ought to be the size of the
+         * polygon list. But this doesn't quite compute up. More investigation
+         * is needed. */
+
         u32 tiler_resolution_check;
-        u32 tiler_flags;
 
-        /* Guesses? */
-        mali_ptr tiler_scratch_start; /* Pointing towards... a zero buffer? */
-        mali_ptr tiler_scratch_middle;
+        u16 tiler_hierarchy_mask;
+        u16 tiler_flags;
+
+        /* See pan_tiler.c */
+        mali_ptr tiler_polygon_list; 
+        mali_ptr tiler_polygon_list_body;
 
         /* See mali_kbase_replay.c */
         mali_ptr tiler_heap_free;
@@ -1523,21 +1529,23 @@ struct bifrost_framebuffer {
 
 
         /* Tiler section begins here */
-        u32 tiler_unknown;
+        u32 tiler_polygon_list_size;
 
         /* Name known from the replay workaround in the kernel. What exactly is
-         * flagged here is less known. We do that (tiler_flags & 0x1ff)
+         * flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
          * specifies a mask of hierarchy weights, which explains some of the
-         * performance mysteries around setting it. We also known (1 << 16)
-         * should be set, but there's no explanation in the kernel why. */
-        u32 tiler_flags;
+         * performance mysteries around setting it. We also see the bottom bit
+         * of tiler_flags set in the kernel, but no comment why. */
+
+        u16 tiler_hierarchy_mask;
+        u16 tiler_flags;
 
-        /* Note: these are guesses! */
-        mali_ptr tiler_scratch_start;
-        mali_ptr tiler_scratch_middle;
+        /* See mali_tiler.c for an explanation */
+        mali_ptr tiler_polygon_list;
+        mali_ptr tiler_polygon_list_body;
 
-        /* These are not, since we see symmetry with replay
-         * jobs which name these explicitly */
+        /* Names based on we see symmetry with replay jobs which name these
+         * explicitly */
 
         mali_ptr tiler_heap_start; /* tiler heap_free_address */
         mali_ptr tiler_heap_end;
index 2a53087..009ff68 100644 (file)
@@ -118,9 +118,10 @@ panfrost_emit_sfbd(struct panfrost_context *ctx)
                 .format = 0x30000000,
                 .clear_flags = 0x1000,
                 .unknown_address_0 = ctx->scratchpad.gpu,
-                .tiler_scratch_start = ctx->misc_0.gpu,
-                .tiler_scratch_middle = ctx->misc_0.gpu + 40960,
-                .tiler_flags = 0xf0,
+                .tiler_polygon_list = ctx->misc_0.gpu,
+                .tiler_polygon_list_body = ctx->misc_0.gpu + 40960,
+                .tiler_hierarchy_mask = 0xF0,
+                .tiler_flags = 0x0,
                 .tiler_heap_free = ctx->tiler_heap.gpu,
                 .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
         };
@@ -134,22 +135,22 @@ struct bifrost_framebuffer
 panfrost_emit_mfbd(struct panfrost_context *ctx)
 {
         struct bifrost_framebuffer framebuffer = {
-                /* It is not yet clear what this means or how it's
-                 * calculated, but we can tell it is a (monotonically
-                 * increasing?) function of tile count and geometry complexity;
-                 * I suspect it defines a memory size of some kind? for the
-                 * tiler. It's really unclear at the moment... but to add to
-                 * the confusion, the hardware is happy enough to accept a zero
-                 * in this field, so we don't even have to worry about it right
-                 * now. */
-
-                .tiler_unknown = 0x0,
-
-                /* The lower 0xff controls the hierarchy mask. Set more bits
+                /* The lower 0x1ff controls the hierarchy mask. Set more bits
                  * on for more tile granularity (which can be a performance win
                  * on some scenes, at memory bandwidth costs). For now, be lazy
                  * and enable everything. This might be a terrible idea. */
-                .tiler_flags = 0xff,
+
+                .tiler_hierarchy_mask = 0xff,
+                .tiler_flags = 0x0,
+
+                /* The hardware deals with suballocation; we don't care */
+                .tiler_heap_start = ctx->tiler_heap.gpu,
+                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
+
+                /* See pan_tiler.c */
+                .tiler_polygon_list  = ctx->misc_0.gpu,
+                .tiler_polygon_list_body = ctx->misc_0.gpu + 0xf0000,
+                .tiler_polygon_list_size = 0x0,
 
                 .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width),
                 .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height),
@@ -164,26 +165,7 @@ panfrost_emit_mfbd(struct panfrost_context *ctx)
 
                 .unknown2 = 0x1f,
 
-                /* Corresponds to unknown_address_X of SFBD */
                 .scratchpad = ctx->scratchpad.gpu,
-                .tiler_scratch_start  = ctx->misc_0.gpu,
-
-                /* The constant added here is, like the lower word of
-                 * tiler_meta, (loosely) another product of framebuffer size
-                 * and geometry complexity. It must be sufficiently large for
-                 * the tiler_meta fast path to work; if it's too small, there
-                 * will be DATA_INVALID_FAULTs. Conversely, it must be less
-                 * than the total size of misc_0, or else there's no room. It's
-                 * possible this constant configures a partition between two
-                 * parts of misc_0? We haven't investigated the functionality,
-                 * as these buffers are internally used by the hardware
-                 * (presumably by the tiler) but not seemingly touched by the driver
-                 */
-
-                .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000,
-
-                .tiler_heap_start = ctx->tiler_heap.gpu,
-                .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size,
         };
 
         return framebuffer;
index 04de638..46cdef3 100644 (file)
@@ -463,11 +463,12 @@ pandecode_replay_sfbd(uint64_t gpu_va, int job_no)
         }
 
         MEMORY_PROP(s, unknown_address_0);
-        MEMORY_PROP(s, tiler_scratch_start);
-        MEMORY_PROP(s, tiler_scratch_middle);
+        MEMORY_PROP(s, tiler_polygon_list);
+        MEMORY_PROP(s, tiler_polygon_list_body);
 
         pandecode_prop("tiler_resolution_check = 0x%" PRIx32, s->tiler_resolution_check);
-        pandecode_prop("tiler_flags = 0x%" PRIx32, s->tiler_flags);
+        pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, s->tiler_hierarchy_mask);
+        pandecode_prop("tiler_flags = 0x%" PRIx16, s->tiler_flags);
 
         MEMORY_PROP(s, tiler_heap_free);
         MEMORY_PROP(s, tiler_heap_end);
@@ -644,8 +645,9 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
          * now */
         MEMORY_PROP(fb, unknown1);
 
-        pandecode_prop("tiler_unknown = 0x%x", fb->tiler_unknown);
-        pandecode_prop("tiler_flags = 0x%x", fb->tiler_flags);
+        pandecode_prop("tiler_polygon_list_size = 0x%x", fb->tiler_polygon_list_size);
+        pandecode_prop("tiler_hierarchy_mask = 0x%" PRIx16, fb->tiler_hierarchy_mask);
+        pandecode_prop("tiler_flags = 0x%" PRIx16, fb->tiler_flags);
 
         pandecode_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
         pandecode_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
@@ -663,8 +665,8 @@ pandecode_replay_mfbd_bfr(uint64_t gpu_va, int job_no, bool with_render_targets)
 
         pandecode_prop("unknown2 = 0x%x", fb->unknown2);
         MEMORY_PROP(fb, scratchpad);
-        MEMORY_PROP(fb, tiler_scratch_start);
-        MEMORY_PROP(fb, tiler_scratch_middle);
+        MEMORY_PROP(fb, tiler_polygon_list);
+        MEMORY_PROP(fb, tiler_polygon_list_body);
         MEMORY_PROP(fb, tiler_heap_start);
         MEMORY_PROP(fb, tiler_heap_end);
 
@@ -1741,7 +1743,7 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no)
 
         pandecode_replay_tiler_heap_meta(t->tiler_heap_meta, job_no);
 
-        pandecode_log("struct mali_tiler_meta tiler_meta_%d = {\n", job_no);
+        pandecode_log("struct bifrost_tiler_meta tiler_meta_%d = {\n", job_no);
         pandecode_indent++;
 
         if (t->zero0 || t->zero1) {
@@ -1750,7 +1752,9 @@ pandecode_replay_tiler_meta(mali_ptr gpu_va, int job_no)
                 pandecode_prop("zero1 = 0x%" PRIx64, t->zero1);
         }
 
-        pandecode_prop("unk = 0x%x", t->unk);
+        pandecode_prop("hierarchy_mask = 0x%" PRIx16, t->hierarchy_mask);
+        pandecode_prop("flags = 0x%" PRIx16, t->flags);
+
         pandecode_prop("width = MALI_POSITIVE(%d)", t->width + 1);
         pandecode_prop("height = MALI_POSITIVE(%d)", t->height + 1);
         DYN_MEMORY_PROP(t, job_no, tiler_heap_meta);