broadcom/vc5: Use supertiles and generic tile lists.
authorEric Anholt <eric@anholt.net>
Wed, 27 Sep 2017 22:27:31 +0000 (15:27 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 10 Oct 2017 18:42:05 +0000 (11:42 -0700)
This massively reduces the size of our RCL setup.  It also gets us closer
to supporting multicore platforms.

src/gallium/drivers/vc5/vc5_cl.h
src/gallium/drivers/vc5/vc5_rcl.c
src/gallium/drivers/vc5/vc5_uniforms.c

index e935eef..64ccac8 100644 (file)
@@ -74,6 +74,11 @@ static inline uint32_t cl_offset(struct vc5_cl *cl)
         return (char *)cl->next - (char *)cl->base;
 }
 
+static inline struct vc5_cl_reloc cl_get_address(struct vc5_cl *cl)
+{
+        return (struct vc5_cl_reloc){ .bo = cl->bo, .offset = cl_offset(cl) };
+}
+
 static inline void
 cl_advance(struct vc5_cl_out **cl, uint32_t n)
 {
index 287a35a..e55a297 100644 (file)
 #include "vc5_tiling.h"
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 
+static void
+vc5_rcl_emit_generic_per_tile_list(struct vc5_job *job)
+{
+        /* Emit the generic list in our indirect state -- the rcl will just
+         * have pointers into it.
+         */
+        struct vc5_cl *cl = &job->indirect;
+        vc5_cl_ensure_space(cl, 200, 1);
+        struct vc5_cl_reloc tile_list_start = cl_get_address(cl);
+
+        const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
+                                                   PIPE_CLEAR_COLOR1 |
+                                                   PIPE_CLEAR_COLOR2 |
+                                                   PIPE_CLEAR_COLOR3);
+        const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
+
+        uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
+
+        /* The initial reload will be queued until we get the
+         * tile coordinates.
+         */
+        if (read_but_not_cleared) {
+                cl_emit(cl, RELOAD_TILE_COLOUR_BUFFER, load) {
+                        load.disable_colour_buffer_load =
+                                (~read_but_not_cleared & pipe_clear_color_buffers) >>
+                                first_color_buffer_bit;
+                        load.enable_z_load =
+                                read_but_not_cleared & PIPE_CLEAR_DEPTH;
+                        load.enable_stencil_load =
+                                read_but_not_cleared & PIPE_CLEAR_STENCIL;
+                }
+        }
+
+        /* Tile Coordinates triggers the reload and sets where the stores
+         * go. There must be one per store packet.
+         */
+        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
+
+        cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
+
+        cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
+                uint32_t color_write_enables =
+                        job->resolve >> first_color_buffer_bit;
+
+                store.disable_color_buffer_write = (~color_write_enables) & 0xf;
+                store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
+                store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
+
+                store.disable_colour_buffers_clear_on_write =
+                        (job->cleared & pipe_clear_color_buffers) == 0;
+                store.disable_z_buffer_clear_on_write =
+                        !(job->cleared & PIPE_CLEAR_DEPTH);
+                store.disable_stencil_buffer_clear_on_write =
+                        !(job->cleared & PIPE_CLEAR_STENCIL);
+        };
+
+        cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
+
+        cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
+                branch.start = tile_list_start;
+                branch.end = cl_get_address(cl);
+        }
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / b)
+
 void
 vc5_emit_rcl(struct vc5_job *job)
 {
-        uint32_t min_x_tile = job->draw_min_x / job->tile_width;
-        uint32_t min_y_tile = job->draw_min_y / job->tile_height;
-        uint32_t max_x_tile = (job->draw_max_x - 1) / job->tile_width;
-        uint32_t max_y_tile = (job->draw_max_y - 1) / job->tile_height;
-
         /* The RCL list should be empty. */
         assert(!job->rcl.bo);
 
-        vc5_cl_ensure_space(&job->rcl,
-                            256 +
-                            (64 *
-                             (max_x_tile - min_x_tile + 1) *
-                             (max_y_tile - min_y_tile + 1)), 1);
-
+        vc5_cl_ensure_space_with_branch(&job->rcl, 200 + 256 *
+                                        cl_packet_length(SUPERTILE_COORDINATES));
         job->submit.rcl_start = job->rcl.bo->offset;
         vc5_job_add_bo(job, job->rcl.bo);
 
@@ -137,7 +194,45 @@ vc5_emit_rcl(struct vc5_job *job)
                         TILE_ALLOCATION_BLOCK_SIZE_64B;
         }
 
-        cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
+        uint32_t supertile_w = 1, supertile_h = 1;
+
+        /* If doing multicore binning, we would need to initialize each core's
+         * tile list here.
+         */
+        cl_emit(&job->rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
+                list.address = cl_address(job->tile_alloc, 0);
+        }
+
+        cl_emit(&job->rcl, MULTICORE_RENDERING_SUPERTILE_CONFIGURATION, config) {
+                uint32_t frame_w_in_supertiles, frame_h_in_supertiles;
+                const uint32_t max_supertiles = 256;
+
+                /* Size up our supertiles until we get under the limit. */
+                for (;;) {
+                        frame_w_in_supertiles = div_round_up(job->draw_tiles_x,
+                                                             supertile_w);
+                        frame_h_in_supertiles = div_round_up(job->draw_tiles_y,
+                                                             supertile_h);
+                        if (frame_w_in_supertiles * frame_h_in_supertiles <
+                            max_supertiles) {
+                                break;
+                        }
+
+                        if (supertile_w < supertile_h)
+                                supertile_w++;
+                        else
+                                supertile_h++;
+                }
+
+                config.total_frame_width_in_tiles = job->draw_tiles_x;
+                config.total_frame_height_in_tiles = job->draw_tiles_y;
+
+                config.supertile_width_in_tiles_minus_1 = supertile_w - 1;
+                config.supertile_height_in_tiles_minus_1 = supertile_h - 1;
+
+                config.total_frame_width_in_supertiles = frame_w_in_supertiles;
+                config.total_frame_height_in_supertiles = frame_h_in_supertiles;
+        }
 
         /* Start by clearing the tile buffer. */
         cl_emit(&job->rcl, TILE_COORDINATES, coords) {
@@ -151,68 +246,26 @@ vc5_emit_rcl(struct vc5_job *job)
 
         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
 
-        const uint32_t pipe_clear_color_buffers = (PIPE_CLEAR_COLOR0 |
-                                                   PIPE_CLEAR_COLOR1 |
-                                                   PIPE_CLEAR_COLOR2 |
-                                                   PIPE_CLEAR_COLOR3);
-        const uint32_t first_color_buffer_bit = (ffs(PIPE_CLEAR_COLOR0) - 1);
+        vc5_rcl_emit_generic_per_tile_list(job);
 
-        for (int y = min_y_tile; y <= max_y_tile; y++) {
-                for (int x = min_x_tile; x <= max_x_tile; x++) {
-                        uint32_t read_but_not_cleared = job->resolve & ~job->cleared;
-
-                        /* The initial reload will be queued until we get the
-                         * tile coordinates.
-                         */
-                        if (read_but_not_cleared) {
-                                cl_emit(&job->rcl, RELOAD_TILE_COLOUR_BUFFER, load) {
-                                        load.disable_colour_buffer_load =
-                                                (~read_but_not_cleared & pipe_clear_color_buffers) >>
-                                                first_color_buffer_bit;
-                                        load.enable_z_load =
-                                                read_but_not_cleared & PIPE_CLEAR_DEPTH;
-                                        load.enable_stencil_load =
-                                                read_but_not_cleared & PIPE_CLEAR_STENCIL;
-                                }
-                        }
-
-                        /* Tile Coordinates triggers the reload and sets where
-                         * the stores go. There must be one per store packet.
-                         */
-                        cl_emit(&job->rcl, TILE_COORDINATES, coords) {
-                                coords.tile_column_number = x;
-                                coords.tile_row_number = y;
-                        }
+        cl_emit(&job->rcl, WAIT_ON_SEMAPHORE, sem);
 
-                        cl_emit(&job->rcl, BRANCH_TO_AUTO_CHAINED_SUB_LIST, branch) {
-                                uint32_t bin_tile_stride =
-                                        (align(job->draw_width,
-                                               job->tile_width) /
-                                         job->tile_width);
-                                uint32_t bin_index =
-                                        (y * bin_tile_stride + x);
-                                branch.address = cl_address(job->tile_alloc,
-                                                            64 * bin_index);
+        /* XXX: Use Morton order */
+        uint32_t supertile_w_in_pixels = job->tile_width * supertile_w;
+        uint32_t supertile_h_in_pixels = job->tile_height * supertile_h;
+        uint32_t min_x_supertile = job->draw_min_x / supertile_w_in_pixels;
+        uint32_t min_y_supertile = job->draw_min_y / supertile_h_in_pixels;
+        uint32_t max_x_supertile = (job->draw_max_x - 1) / supertile_w_in_pixels;
+        uint32_t max_y_supertile = (job->draw_max_y - 1) / supertile_h_in_pixels;
+
+        for (int y = min_y_supertile; y <= max_y_supertile; y++) {
+                for (int x = min_x_supertile; x <= max_x_supertile; x++) {
+                        cl_emit(&job->rcl, SUPERTILE_COORDINATES, coords) {
+                                coords.column_number_in_supertiles = x;
+                                coords.row_number_in_supertiles = y;
                         }
-
-                        cl_emit(&job->rcl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
-                                uint32_t color_write_enables =
-                                        job->resolve >> first_color_buffer_bit;
-
-                                store.disable_color_buffer_write = (~color_write_enables) & 0xf;
-                                store.enable_z_write = job->resolve & PIPE_CLEAR_DEPTH;
-                                store.enable_stencil_write = job->resolve & PIPE_CLEAR_STENCIL;
-
-                                store.disable_colour_buffers_clear_on_write =
-                                        (job->cleared & pipe_clear_color_buffers) == 0;
-                                store.disable_z_buffer_clear_on_write =
-                                        !(job->cleared & PIPE_CLEAR_DEPTH);
-                                store.disable_stencil_buffer_clear_on_write =
-                                        !(job->cleared & PIPE_CLEAR_STENCIL);
-
-                                store.last_tile_of_frame = (x == max_x_tile &&
-                                                            y == max_y_tile);
-                        };
                 }
         }
+
+        cl_emit(&job->rcl, END_OF_RENDERING, end);
 }
index dc444fe..0c8bee5 100644 (file)
@@ -225,8 +225,7 @@ vc5_write_uniforms(struct vc5_context *vc5, struct vc5_compiled_shader *shader,
          */
         vc5_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
 
-        struct vc5_cl_reloc uniform_stream =
-                cl_address(job->indirect.bo, cl_offset(&job->indirect));
+        struct vc5_cl_reloc uniform_stream = cl_get_address(&job->indirect);
         vc5_bo_reference(uniform_stream.bo);
 
         struct vc5_cl_out *uniforms =