panfrost: Remove experimental v7-only indirect draw path
authorAlyssa Rosenzweig <alyssa@collabora.com>
Wed, 14 Dec 2022 23:37:27 +0000 (18:37 -0500)
committerMarge Bot <emma+marge@anholt.net>
Fri, 23 Dec 2022 16:27:16 +0000 (16:27 +0000)
There are too many problems with indirect draws on v7 that we never got this
code path to the finish line, and none of us have a good plan (or reason) to fix
this. Proper indirect draws are only possible since v10 on Mali.

There was interest in using this path to implement indexed draws in PanVK, that
MR is stalled and it's not clear how much sense it makes to do Vulkan on
anything older than v9 or v10 at this point. This code isn't *gone*, it'll still
be in git history, but I don't see a lot of reason in keeping it in tree if it's
unused and complicating e.g. the sysval upload path of the driver.

Indirect dispatch remains supported on v7, as that path *is* working and flipped
on for end users. Indirect dispatch on v7 is considerably less complicated than
indirect draws.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20420>

src/gallium/drivers/panfrost/pan_cmdstream.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_job.h
src/gallium/drivers/panfrost/pan_screen.c
src/gallium/drivers/panfrost/pan_screen.h
src/panfrost/ci/panfrost-g52-skips.txt
src/panfrost/lib/meson.build
src/panfrost/lib/pan_device.h
src/panfrost/lib/pan_indirect_draw.c [deleted file]
src/panfrost/lib/pan_indirect_draw.h [deleted file]

index 3d17d08c7579dcc34c2e31496e348b03f65ac261..02df91cec6e753354de69f40907e7646f79f868d 100644 (file)
@@ -44,7 +44,6 @@
 #include "pan_shader.h"
 #include "pan_texture.h"
 #include "pan_util.h"
-#include "pan_indirect_draw.h"
 #include "pan_indirect_dispatch.h"
 #include "pan_blitter.h"
 
@@ -1416,13 +1415,6 @@ panfrost_upload_sysvals(struct panfrost_batch *batch,
                         break;
 #endif
                 case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
-                        batch->ctx->first_vertex_sysval_ptr =
-                                ptr->gpu + (i * sizeof(*uniforms));
-                        batch->ctx->base_vertex_sysval_ptr =
-                                batch->ctx->first_vertex_sysval_ptr + 4;
-                        batch->ctx->base_instance_sysval_ptr =
-                                batch->ctx->first_vertex_sysval_ptr + 8;
-
                         uniforms[i].u[0] = batch->ctx->offset_start;
                         uniforms[i].u[1] = batch->ctx->base_vertex;
                         uniforms[i].u[2] = batch->ctx->base_instance;
@@ -1568,33 +1560,8 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
                         unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]);
                         mali_ptr ptr = push_transfer.gpu + (4 * i);
 
-                        switch (sysval_type) {
-                        case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
-                                switch (sysval_comp) {
-                                case 0:
-                                        batch->ctx->first_vertex_sysval_ptr = ptr;
-                                        break;
-                                case 1:
-                                        batch->ctx->base_vertex_sysval_ptr = ptr;
-                                        break;
-                                case 2:
-                                        batch->ctx->base_instance_sysval_ptr = ptr;
-                                        break;
-                                case 3:
-                                        /* Spurious (Midgard doesn't pack) */
-                                        break;
-                                default:
-                                        unreachable("Invalid vertex/instance offset component\n");
-                                }
-                                break;
-
-                        case PAN_SYSVAL_NUM_WORK_GROUPS:
+                        if (sysval_type == PAN_SYSVAL_NUM_WORK_GROUPS)
                                 batch->num_wg_sysval[sysval_comp] = ptr;
-                                break;
-
-                        default:
-                                break;
-                        }
                 }
                 /* Map the UBO, this should be cheap. However this is reading
                  * from write-combine memory which is _very_ slow. It might pay
@@ -2025,7 +1992,7 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
         struct panfrost_context *ctx = batch->ctx;
         struct panfrost_vertex_state *so = ctx->vertex;
         struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-        bool instanced = ctx->indirect_draw || ctx->instance_count > 1;
+        bool instanced = ctx->instance_count > 1;
         uint32_t image_mask = ctx->image_mask[PIPE_SHADER_VERTEX];
         unsigned nr_images = util_last_bit(image_mask);
 
@@ -2103,33 +2070,6 @@ panfrost_emit_vertex_data(struct panfrost_batch *batch,
                 /* When there is a divisor, the hardware-level divisor is
                  * the product of the instance divisor and the padded count */
                 unsigned stride = buf->stride;
-
-                if (ctx->indirect_draw) {
-                        /* We allocated 2 records for each attribute buffer */
-                        assert((k & 1) == 0);
-
-                        /* With indirect draws we can't guess the vertex_count.
-                         * Pre-set the address, stride and size fields, the
-                         * compute shader do the rest.
-                         */
-                        pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
-                                cfg.type = MALI_ATTRIBUTE_TYPE_1D;
-                                cfg.pointer = addr;
-                                cfg.stride = stride;
-                                cfg.size = size;
-                        }
-
-                        /* We store the unmodified divisor in the continuation
-                         * slot so the compute shader can retrieve it.
-                         */
-                        pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
-                                cfg.divisor = divisor;
-                        }
-
-                        k += 2;
-                        continue;
-                }
-
                 unsigned hw_divisor = ctx->padded_count * divisor;
 
                 if (ctx->instance_count <= 1) {
@@ -2273,7 +2213,6 @@ panfrost_emit_varyings(struct panfrost_batch *batch,
 {
         unsigned size = stride * count;
         mali_ptr ptr =
-                batch->ctx->indirect_draw ? 0 :
                 pan_pool_alloc_aligned(&batch->invisible_pool.base, size, 64).gpu;
 
         pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
@@ -2677,13 +2616,9 @@ panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
                                 const struct panfrost_ptr *vertex_job,
                                 const struct panfrost_ptr *tiler_job)
 {
-        struct panfrost_context *ctx = batch->ctx;
-
         unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->scoreboard,
                                            MALI_JOB_TYPE_VERTEX, false, false,
-                                           ctx->indirect_draw ?
-                                           batch->indirect_draw_job_id : 0,
-                                           0, vertex_job, false);
+                                           0, 0, vertex_job, false);
 
         panfrost_add_job(&batch->pool.base, &batch->scoreboard,
                          MALI_JOB_TYPE_TILER, false, false,
@@ -3136,10 +3071,9 @@ panfrost_emit_primitive(struct panfrost_context *ctx,
                 assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
 #endif
 
-                cfg.index_count = ctx->indirect_draw ? 1 : draw->count;
+                cfg.index_count = draw->count;
                 cfg.index_type = panfrost_translate_index_size(info->index_size);
 
-
                 if (PAN_ARCH >= 9) {
                         /* Base vertex offset on Valhall is used for both
                          * indexed and non-indexed draws, in a simple way for
@@ -3653,7 +3587,6 @@ panfrost_direct_draw(struct panfrost_batch *batch,
         }
 
         /* Take into account a negative bias */
-        ctx->indirect_draw = false;
         ctx->vertex_count = draw->count + (info->index_size ? abs(draw->index_bias) : 0);
         ctx->instance_count = info->instance_count;
         ctx->base_vertex = info->index_size ? draw->index_bias : 0;
@@ -3803,172 +3736,6 @@ panfrost_direct_draw(struct panfrost_batch *batch,
 #endif
 }
 
-#if PAN_GPU_INDIRECTS
-static void
-panfrost_indirect_draw(struct panfrost_batch *batch,
-                       const struct pipe_draw_info *info,
-                       unsigned drawid_offset,
-                       const struct pipe_draw_indirect_info *indirect,
-                       const struct pipe_draw_start_count_bias *draw)
-{
-        /* Indirect draw count and multi-draw not supported. */
-        assert(indirect->draw_count == 1 && !indirect->indirect_draw_count);
-
-        struct panfrost_context *ctx = batch->ctx;
-        struct panfrost_device *dev = pan_device(ctx->base.screen);
-
-        perf_debug(dev, "Emulating indirect draw on the GPU");
-
-        /* TODO: update statistics (see panfrost_statistics_record()) */
-        /* TODO: Increment transform feedback offsets */
-        assert(ctx->streamout.num_targets == 0);
-
-        ctx->active_prim = info->mode;
-        ctx->drawid = drawid_offset;
-        ctx->indirect_draw = true;
-
-        struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-
-        bool idvs = vs->info.vs.idvs;
-        bool secondary_shader = vs->info.vs.secondary_enable;
-
-        struct panfrost_ptr tiler = { 0 }, vertex = { 0 };
-
-        if (idvs) {
-#if PAN_ARCH >= 6
-                tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
-#else
-                unreachable("IDVS is unsupported on Midgard");
-#endif
-        } else {
-                vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-                tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
-        }
-
-        struct panfrost_bo *index_buf = NULL;
-
-        if (info->index_size) {
-                assert(!info->has_user_indices);
-                struct panfrost_resource *rsrc = pan_resource(info->index.resource);
-                index_buf = rsrc->image.data.bo;
-                panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
-        }
-
-        mali_ptr varyings = 0, vs_vary = 0, fs_vary = 0, pos = 0, psiz = 0;
-        unsigned varying_buf_count;
-
-        /* We want to create templates, set all count fields to 0 to reflect
-         * that.
-         */
-        ctx->instance_count = ctx->vertex_count = ctx->padded_count = 0;
-        ctx->offset_start = 0;
-
-        /* Set the {first,base}_vertex sysvals to NULL. Will be updated if the
-         * vertex shader uses gl_VertexID or gl_BaseVertex.
-         */
-        ctx->first_vertex_sysval_ptr = 0;
-        ctx->base_vertex_sysval_ptr = 0;
-        ctx->base_instance_sysval_ptr = 0;
-
-        panfrost_update_state_3d(batch);
-        panfrost_update_shader_state(batch, PIPE_SHADER_VERTEX);
-        panfrost_update_shader_state(batch, PIPE_SHADER_FRAGMENT);
-        panfrost_clean_state_3d(ctx);
-
-        bool point_coord_replace = (info->mode == PIPE_PRIM_POINTS);
-
-        panfrost_emit_varying_descriptor(batch, 0,
-                                         &vs_vary, &fs_vary, &varyings,
-                                         &varying_buf_count, &pos, &psiz,
-                                         point_coord_replace);
-
-        mali_ptr attribs, attrib_bufs;
-        attribs = panfrost_emit_vertex_data(batch, &attrib_bufs);
-
-        /* Zero-ed invocation, the compute job will update it. */
-        static struct mali_invocation_packed invocation;
-
-        /* Fire off the draw itself */
-        panfrost_draw_emit_tiler(batch, info, draw, &invocation,
-                                 index_buf ? index_buf->ptr.gpu : 0,
-                                 fs_vary, varyings, pos, psiz, secondary_shader,
-                                 tiler.cpu);
-        if (idvs) {
-#if PAN_ARCH >= 6
-                panfrost_draw_emit_vertex_section(batch,
-                                  vs_vary, varyings,
-                                  attribs, attrib_bufs,
-                                  pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
-#endif
-        } else {
-                panfrost_draw_emit_vertex(batch, info, &invocation,
-                                          vs_vary, varyings, attribs, attrib_bufs, vertex.cpu);
-        }
-
-        /* Add the varying heap BO to the batch if we're allocating varyings. */
-        if (varyings) {
-                panfrost_batch_add_bo(batch,
-                                      dev->indirect_draw_shaders.varying_heap,
-                                      PIPE_SHADER_VERTEX);
-        }
-
-        assert(indirect->buffer);
-
-        struct panfrost_resource *draw_buf = pan_resource(indirect->buffer);
-
-        /* Don't count images: those attributes don't need to be patched. */
-        unsigned attrib_count =
-                vs->info.attribute_count -
-                util_bitcount(ctx->image_mask[PIPE_SHADER_VERTEX]);
-
-        panfrost_batch_read_rsrc(batch, draw_buf, PIPE_SHADER_VERTEX);
-
-        struct pan_indirect_draw_info draw_info = {
-                .last_indirect_draw = batch->indirect_draw_job_id,
-                .draw_buf = draw_buf->image.data.bo->ptr.gpu + indirect->offset,
-                .index_buf = index_buf ? index_buf->ptr.gpu : 0,
-                .first_vertex_sysval = ctx->first_vertex_sysval_ptr,
-                .base_vertex_sysval = ctx->base_vertex_sysval_ptr,
-                .base_instance_sysval = ctx->base_instance_sysval_ptr,
-                .vertex_job = vertex.gpu,
-                .tiler_job = tiler.gpu,
-                .attrib_bufs = attrib_bufs,
-                .attribs = attribs,
-                .attrib_count = attrib_count,
-                .varying_bufs = varyings,
-                .index_size = info->index_size,
-        };
-
-        if (panfrost_writes_point_size(ctx))
-                draw_info.flags |= PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE;
-
-        if (vs->info.vs.writes_point_size)
-                draw_info.flags |= PAN_INDIRECT_DRAW_HAS_PSIZ;
-
-        if (idvs)
-                draw_info.flags |= PAN_INDIRECT_DRAW_IDVS;
-
-        if (info->primitive_restart) {
-                draw_info.restart_index = info->restart_index;
-                draw_info.flags |= PAN_INDIRECT_DRAW_PRIMITIVE_RESTART;
-        }
-
-        batch->indirect_draw_job_id =
-                GENX(panfrost_emit_indirect_draw)(&batch->pool.base,
-                                                  &batch->scoreboard,
-                                                  &draw_info,
-                                                  &batch->indirect_draw_ctx);
-
-        if (idvs) {
-                panfrost_add_job(&batch->pool.base, &batch->scoreboard,
-                                 MALI_JOB_TYPE_INDEXED_VERTEX, false, false,
-                                 0, 0, &tiler, false);
-        } else {
-                panfrost_emit_vertex_tiler_jobs(batch, &vertex, &tiler);
-        }
-}
-#endif
-
 static bool
 panfrost_compatible_batch_state(struct panfrost_batch *batch,
                                 bool points)
@@ -4008,8 +3775,8 @@ panfrost_draw_vbo(struct pipe_context *pipe,
 
         ctx->draw_calls++;
 
-        /* Emulate indirect draws unless we're using the experimental path */
-        if ((!(dev->debug & PAN_DBG_INDIRECT) || !PAN_GPU_INDIRECTS) && indirect && indirect->buffer) {
+        /* Emulate indirect draws on JM */
+        if (indirect && indirect->buffer) {
                 assert(num_draws == 1);
                 util_draw_indirect(pipe, info, indirect);
                 perf_debug(dev, "Emulating indirect draw on the CPU");
@@ -4048,28 +3815,6 @@ panfrost_draw_vbo(struct pipe_context *pipe,
         /* Conservatively assume draw parameters always change */
         ctx->dirty |= PAN_DIRTY_PARAMS | PAN_DIRTY_DRAWID;
 
-        if (indirect) {
-                assert(num_draws == 1);
-                assert(PAN_GPU_INDIRECTS);
-
-#if PAN_GPU_INDIRECTS
-                if (indirect->count_from_stream_output) {
-                        struct pipe_draw_start_count_bias tmp_draw = *draws;
-                        struct panfrost_streamout_target *so =
-                                pan_so_target(indirect->count_from_stream_output);
-
-                        tmp_draw.start = 0;
-                        tmp_draw.count = so->offset;
-                        tmp_draw.index_bias = 0;
-                        panfrost_direct_draw(batch, info, drawid_offset, &tmp_draw);
-                        return;
-                }
-
-                panfrost_indirect_draw(batch, info, drawid_offset, indirect, &draws[0]);
-                return;
-#endif
-        }
-
         struct pipe_draw_info tmp_info = *info;
         unsigned drawid = drawid_offset;
 
@@ -4692,9 +4437,7 @@ screen_destroy(struct pipe_screen *pscreen)
 {
         struct panfrost_device *dev = pan_device(pscreen);
         GENX(pan_blitter_cleanup)(dev);
-
 #if PAN_GPU_INDIRECTS
-        GENX(panfrost_cleanup_indirect_draw_shaders)(dev);
         GENX(pan_indirect_dispatch_cleanup)(dev);
 #endif
 }
@@ -4845,7 +4588,4 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
 
         GENX(pan_blitter_init)(dev, &screen->blitter.bin_pool.base,
                                &screen->blitter.desc_pool.base);
-#if PAN_GPU_INDIRECTS
-        GENX(panfrost_init_indirect_draw_shaders)(dev, &screen->indirect_draw.bin_pool.base);
-#endif
 }
index 2bcaf3533e70fd74071d686c333199bb893fa242..8810009b8b9bc76be91c2a673acd5c9864835e78 100644 (file)
@@ -162,16 +162,12 @@ struct panfrost_context {
         uint64_t draw_calls;
         struct panfrost_query *occlusion_query;
 
-        bool indirect_draw;
         unsigned drawid;
         unsigned vertex_count;
         unsigned instance_count;
         unsigned offset_start;
         unsigned base_vertex;
         unsigned base_instance;
-        mali_ptr first_vertex_sysval_ptr;
-        mali_ptr base_vertex_sysval_ptr;
-        mali_ptr base_instance_sysval_ptr;
         enum pipe_prim_type active_prim;
 
         /* If instancing is enabled, vertex count padded for instance; if
index 23263c54e0716cac7390ed61dbf05fd4327eb6b1..727427b1a17145af7a39e7bbde3a3321b3a3f7ca 100644 (file)
@@ -158,10 +158,6 @@ struct panfrost_batch {
         /* Tiler context */
         struct pan_tiler_context tiler_ctx;
 
-        /* Indirect draw data */
-        struct panfrost_ptr indirect_draw_ctx;
-        unsigned indirect_draw_job_id;
-
         /* Keep the num_work_groups sysval around for indirect dispatch */
         mali_ptr num_wg_sysval[3];
 
index 5c8072fc3e0981b327a984408241b210e2b65370..33b83840058393dcb62050410d04c56835a0bc81 100644 (file)
@@ -765,7 +765,6 @@ panfrost_destroy_screen(struct pipe_screen *pscreen)
         struct panfrost_screen *screen = pan_screen(pscreen);
 
         panfrost_resource_screen_destroy(pscreen);
-        panfrost_pool_cleanup(&screen->indirect_draw.bin_pool);
         panfrost_pool_cleanup(&screen->blitter.bin_pool);
         panfrost_pool_cleanup(&screen->blitter.desc_pool);
         pan_blend_shaders_cleanup(dev);
@@ -868,9 +867,6 @@ panfrost_create_screen(int fd, struct renderonly *ro)
 
         panfrost_disk_cache_init(screen);
 
-        panfrost_pool_init(&screen->indirect_draw.bin_pool, NULL, dev,
-                           PAN_BO_EXECUTE, 65536, "Indirect draw shaders",
-                           false, true);
         panfrost_pool_init(&screen->blitter.bin_pool, NULL, dev, PAN_BO_EXECUTE,
                            4096, "Blitter shaders", false, true);
         panfrost_pool_init(&screen->blitter.desc_pool, NULL, dev, 0, 65536,
index 656a4948a422d23f95d05278412984b7776ac55c..f3f7df4189257dd9ddf96438c5bdaea9883e4cea 100644 (file)
@@ -109,9 +109,6 @@ struct panfrost_screen {
                 struct panfrost_pool bin_pool;
                 struct panfrost_pool desc_pool;
         } blitter;
-        struct {
-                struct panfrost_pool bin_pool;
-        } indirect_draw;
 
         struct panfrost_vtable vtbl;
         struct disk_cache *disk_cache;
index c08d5dd87e20ce51a33fd5273c18d11c0a5d2299..8f627a48b6e227e8ccb9d105b5d51ac631d90873 100644 (file)
@@ -6,14 +6,6 @@
 # kernel driver
 dEQP-GLES31.functional.draw_indirect.compute_interop.large.*
 
-# We lack a dependency between the vertex job filling the indirect draw
-# buffers and the indirect draw compute job reading from these buffers,
-# leading to unreliable results (the tests pass if the vertex job is
-# done before the compute job starts, and fail otherwise). Let's disable
-# those tests until we sort it out.
-KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-arrays
-KHR-GLES31.core.draw_indirect.advanced-twoPass-transformFeedback-elements
-
 # fail seen here and in others
 # https://gitlab.freedesktop.org/mesa/mesa/-/jobs/19776551
 dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.36
index f8c34cb5a825f35badf64cee4e2a0677e7a5f37a..ab83345012c6406e78db95be838855570ad482e2 100644 (file)
@@ -59,10 +59,7 @@ endforeach
 foreach ver : ['7']
   libpanfrost_per_arch += static_library(
     'pan-arch-indirect-v' + ver,
-    [
-      'pan_indirect_dispatch.c',
-      'pan_indirect_draw.c',
-    ],
+    'pan_indirect_dispatch.c',
     include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_panfrost_hw],
     c_args : ['-DPAN_ARCH=' + ver],
     gnu_symbol_visibility : 'hidden',
index ad18d154a2cf29fcb66968ec0e9d7f85001505a0..52c029bfe9fa44dc3034b115c153fb106a3e9ccb 100644 (file)
@@ -79,59 +79,6 @@ struct pan_blend_shaders {
         pthread_mutex_t lock;
 };
 
-enum pan_indirect_draw_flags {
-        PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0,
-        PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0,
-        PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0,
-        PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0,
-        PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0,
-        PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2,
-        PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3,
-        PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4,
-        PAN_INDIRECT_DRAW_IDVS = 1 << 5,
-        PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_IDVS,
-        PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART,
-        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART,
-        PAN_INDIRECT_DRAW_NUM_SHADERS,
-};
-
-struct pan_indirect_draw_shader {
-        struct panfrost_ubo_push push;
-        mali_ptr rsd;
-};
-
-struct pan_indirect_draw_shaders {
-        struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS];
-
-        /* Take the lock when initializing the draw shaders context or when
-         * allocating from the binary pool.
-         */
-        pthread_mutex_t lock;
-
-        /* A memory pool for shader binaries. We currently don't allocate a
-         * single BO for all shaders up-front because estimating shader size
-         * is not trivial, and changes to the compiler might influence this
-         * estimation.
-         */
-        struct pan_pool *bin_pool;
-
-        /* BO containing all renderer states attached to the compute shaders.
-         * Those are built at shader compilation time and re-used every time
-         * panfrost_emit_indirect_draw() is called.
-         */
-        struct panfrost_bo *states;
-
-        /* Varying memory is allocated dynamically by compute jobs from this
-         * heap.
-         */
-        struct panfrost_bo *varying_heap;
-};
-
 struct pan_indirect_dispatch {
         struct panfrost_ubo_push push;
         struct panfrost_bo *bin;
@@ -239,7 +186,6 @@ struct panfrost_device {
 
         struct pan_blitter blitter;
         struct pan_blend_shaders blend_shaders;
-        struct pan_indirect_draw_shaders indirect_draw_shaders;
         struct pan_indirect_dispatch indirect_dispatch;
 
         /* Tiler heap shared across all tiler jobs, allocated against the
diff --git a/src/panfrost/lib/pan_indirect_draw.c b/src/panfrost/lib/pan_indirect_draw.c
deleted file mode 100644 (file)
index 3fa1f54..0000000
+++ /dev/null
@@ -1,1374 +0,0 @@
-/*
- * Copyright (C) 2021 Collabora, Ltd.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <stdio.h>
-#include "pan_bo.h"
-#include "pan_shader.h"
-#include "pan_scoreboard.h"
-#include "pan_encoder.h"
-#include "pan_indirect_draw.h"
-#include "pan_pool.h"
-#include "pan_util.h"
-#include "compiler/nir/nir_builder.h"
-#include "util/u_memory.h"
-#include "util/macros.h"
-
-#define WORD(x) ((x) * 4)
-
-#define LOOP \
-        for (nir_loop *l = nir_push_loop(b); l != NULL; \
-             nir_pop_loop(b, l), l = NULL)
-#define BREAK nir_jump(b, nir_jump_break)
-#define CONTINUE nir_jump(b, nir_jump_continue)
-
-#define IF(cond) nir_push_if(b, cond);
-#define ELSE nir_push_else(b, NULL);
-#define ENDIF nir_pop_if(b, NULL);
-
-#define MIN_MAX_JOBS 128
-
-struct draw_data {
-        nir_ssa_def *draw_buf;
-        nir_ssa_def *draw_buf_stride;
-        nir_ssa_def *index_buf;
-        nir_ssa_def *restart_index;
-        nir_ssa_def *vertex_count;
-        nir_ssa_def *start_instance;
-        nir_ssa_def *instance_count;
-        nir_ssa_def *vertex_start;
-        nir_ssa_def *index_bias;
-        nir_ssa_def *draw_ctx;
-        nir_ssa_def *min_max_ctx;
-};
-
-struct instance_size {
-        nir_ssa_def *raw;
-        nir_ssa_def *padded;
-        nir_ssa_def *packed;
-};
-
-struct jobs_data {
-        nir_ssa_def *vertex_job;
-        nir_ssa_def *tiler_job;
-        nir_ssa_def *base_vertex_offset;
-        nir_ssa_def *first_vertex_sysval;
-        nir_ssa_def *base_vertex_sysval;
-        nir_ssa_def *base_instance_sysval;
-        nir_ssa_def *offset_start;
-        nir_ssa_def *invocation;
-};
-
-struct varyings_data {
-        nir_ssa_def *varying_bufs;
-        nir_ssa_def *pos_ptr;
-        nir_ssa_def *psiz_ptr;
-        nir_variable *mem_ptr;
-};
-
-struct attribs_data {
-        nir_ssa_def *attrib_count;
-        nir_ssa_def *attrib_bufs;
-        nir_ssa_def *attribs;
-};
-
-struct indirect_draw_shader_builder {
-        nir_builder b;
-        const struct panfrost_device *dev;
-        unsigned flags;
-        bool index_min_max_search;
-        unsigned index_size;
-        struct draw_data draw;
-        struct instance_size instance_size;
-        struct jobs_data jobs;
-        struct varyings_data varyings;
-        struct attribs_data attribs;
-};
-
-/* Describes an indirect draw (see glDrawArraysIndirect()) */
-
-struct indirect_draw_info {
-        uint32_t count;
-        uint32_t instance_count;
-        uint32_t start;
-        uint32_t start_instance;
-};
-
-struct indirect_indexed_draw_info {
-        uint32_t count;
-        uint32_t instance_count;
-        uint32_t start;
-        int32_t index_bias;
-        uint32_t start_instance;
-};
-
-/* Store the min/max index in a separate context. This is not supported yet, but
- * the DDK seems to put all min/max search jobs at the beginning of the job chain
- * when multiple indirect draws are issued to avoid the serialization caused by
- * the draw patching jobs which have the suppress_prefetch flag set. Merging the
- * min/max and draw contexts would prevent such optimizations (draw contexts are
- * shared by all indirect draw in a batch).
- */
-
-struct min_max_context {
-        uint32_t min;
-        uint32_t max;
-};
-
-/* Per-batch context shared by all indirect draws queued to a given batch. */
-
-struct indirect_draw_context {
-        /* Pointer to the top of the varying heap. */
-        mali_ptr varying_mem;
-};
-
-/* Indirect draw shader inputs. Those are stored in FAU. */
-
-struct indirect_draw_inputs {
-        /* indirect_draw_context pointer */
-        mali_ptr draw_ctx;
-
-        /* min_max_context pointer */
-        mali_ptr min_max_ctx;
-
-        /* Pointer to an array of indirect_draw_info objects */
-        mali_ptr draw_buf;
-
-        /* Pointer to an uint32_t containing the number of draws to issue */
-        mali_ptr draw_count_ptr;
-
-        /* index buffer */
-        mali_ptr index_buf;
-
-        /* {base,first}_{vertex,instance} sysvals */
-        mali_ptr first_vertex_sysval;
-        mali_ptr base_vertex_sysval;
-        mali_ptr base_instance_sysval;
-
-        /* Pointers to various cmdstream structs that need to be patched */
-        mali_ptr vertex_job;
-        mali_ptr tiler_job;
-        mali_ptr attrib_bufs;
-        mali_ptr attribs;
-        mali_ptr varying_bufs;
-        uint32_t draw_count;
-        uint32_t draw_buf_stride;
-        uint32_t restart_index;
-        uint32_t attrib_count;
-} PACKED;
-
-#define get_input_field(b, name) \
-        nir_load_push_constant(b, \
-               1, sizeof(((struct indirect_draw_inputs *)0)->name) * 8, \
-               nir_imm_int(b, 0), \
-               .base = offsetof(struct indirect_draw_inputs, name))
-
-static nir_ssa_def *
-get_address(nir_builder *b, nir_ssa_def *base, nir_ssa_def *offset)
-{
-        return nir_iadd(b, base, nir_u2u64(b, offset));
-}
-
-static nir_ssa_def *
-get_address_imm(nir_builder *b, nir_ssa_def *base, unsigned offset)
-{
-        return get_address(b, base, nir_imm_int(b, offset));
-}
-
-static nir_ssa_def *
-load_global(nir_builder *b, nir_ssa_def *addr, unsigned ncomps, unsigned bit_size)
-{
-        return nir_load_global(b, addr, 4, ncomps, bit_size);
-}
-
-static void
-store_global(nir_builder *b, nir_ssa_def *addr,
-             nir_ssa_def *value, unsigned ncomps)
-{
-        nir_store_global(b, addr, 4, value, (1 << ncomps) - 1);
-}
-
-static nir_ssa_def *
-get_draw_ctx_data(struct indirect_draw_shader_builder *builder,
-                  unsigned offset, unsigned size)
-{
-        nir_builder *b = &builder->b;
-        return load_global(b,
-                           get_address_imm(b, builder->draw.draw_ctx, offset),
-                           1, size);
-}
-
-static void
-set_draw_ctx_data(struct indirect_draw_shader_builder *builder,
-                  unsigned offset, nir_ssa_def *value, unsigned size)
-{
-        nir_builder *b = &builder->b;
-        store_global(b,
-                     get_address_imm(b, builder->draw.draw_ctx, offset),
-                     value, 1);
-}
-
-#define get_draw_ctx_field(builder, name) \
-        get_draw_ctx_data(builder, \
-                          offsetof(struct indirect_draw_context, name), \
-                          sizeof(((struct indirect_draw_context *)0)->name) * 8)
-
-#define set_draw_ctx_field(builder, name, val) \
-        set_draw_ctx_data(builder, \
-                          offsetof(struct indirect_draw_context, name), \
-                          val, \
-                          sizeof(((struct indirect_draw_context *)0)->name) * 8)
-
-static nir_ssa_def *
-get_min_max_ctx_data(struct indirect_draw_shader_builder *builder,
-                     unsigned offset, unsigned size)
-{
-        nir_builder *b = &builder->b;
-        return load_global(b,
-                           get_address_imm(b, builder->draw.min_max_ctx, offset),
-                           1, size);
-}
-
-#define get_min_max_ctx_field(builder, name) \
-        get_min_max_ctx_data(builder, \
-                             offsetof(struct min_max_context, name), \
-                             sizeof(((struct min_max_context *)0)->name) * 8)
-
-static void
-update_min(struct indirect_draw_shader_builder *builder, nir_ssa_def *val)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *addr =
-                get_address_imm(b,
-                                builder->draw.min_max_ctx,
-                                offsetof(struct min_max_context, min));
-        nir_global_atomic_umin(b, 32, addr, val);
-}
-
-static void
-update_max(struct indirect_draw_shader_builder *builder, nir_ssa_def *val)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *addr =
-                get_address_imm(b,
-                                builder->draw.min_max_ctx,
-                                offsetof(struct min_max_context, max));
-        nir_global_atomic_umax(b, 32, addr, val);
-}
-
-#define get_draw_field(b, draw_ptr, field) \
-        load_global(b, \
-                    get_address_imm(b, draw_ptr, \
-                                    offsetof(struct indirect_draw_info, field)), \
-                    1, sizeof(((struct indirect_draw_info *)0)->field) * 8)
-
-#define get_indexed_draw_field(b, draw_ptr, field) \
-        load_global(b, \
-                    get_address_imm(b, draw_ptr, \
-                                    offsetof(struct indirect_indexed_draw_info, field)), \
-                    1, sizeof(((struct indirect_indexed_draw_info *)0)->field) * 8)
-
-static void
-extract_inputs(struct indirect_draw_shader_builder *builder)
-{
-        nir_builder *b = &builder->b;
-
-        builder->draw.draw_ctx = get_input_field(b, draw_ctx);
-        builder->draw.draw_buf = get_input_field(b, draw_buf);
-        builder->draw.draw_buf_stride = get_input_field(b, draw_buf_stride);
-
-        if (builder->index_size) {
-                builder->draw.index_buf = get_input_field(b, index_buf);
-                builder->draw.min_max_ctx = get_input_field(b, min_max_ctx);
-                if (builder->flags & PAN_INDIRECT_DRAW_PRIMITIVE_RESTART) {
-                        builder->draw.restart_index =
-                                get_input_field(b, restart_index);
-                }
-        }
-
-        if (builder->index_min_max_search)
-                return;
-
-        builder->jobs.first_vertex_sysval = get_input_field(b, first_vertex_sysval);
-        builder->jobs.base_vertex_sysval = get_input_field(b, base_vertex_sysval);
-        builder->jobs.base_instance_sysval = get_input_field(b, base_instance_sysval);
-        builder->jobs.vertex_job = get_input_field(b, vertex_job);
-        builder->jobs.tiler_job = get_input_field(b, tiler_job);
-        builder->attribs.attrib_bufs = get_input_field(b, attrib_bufs);
-        builder->attribs.attribs = get_input_field(b, attribs);
-        builder->attribs.attrib_count = get_input_field(b, attrib_count);
-        builder->varyings.varying_bufs = get_input_field(b, varying_bufs);
-        builder->varyings.mem_ptr =
-                nir_local_variable_create(b->impl,
-                                          glsl_uint64_t_type(),
-                                          "var_mem_ptr");
-        nir_store_var(b, builder->varyings.mem_ptr,
-                      get_draw_ctx_field(builder, varying_mem), 3);
-}
-
-static void
-init_shader_builder(struct indirect_draw_shader_builder *builder,
-                    const struct panfrost_device *dev,
-                    unsigned flags, unsigned index_size,
-                    bool index_min_max_search)
-{
-        memset(builder, 0, sizeof(*builder));
-        builder->dev = dev;
-        builder->flags = flags;
-        builder->index_size = index_size;
-
-        builder->index_min_max_search = index_min_max_search;
-
-        if (index_min_max_search) {
-                builder->b =
-                        nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
-                                                       GENX(pan_shader_get_compiler_options)(),
-                                                       "indirect_draw_min_max_index(index_size=%d)",
-                                                       builder->index_size);
-        } else {
-                builder->b =
-                        nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
-                                                       GENX(pan_shader_get_compiler_options)(),
-                                                       "indirect_draw(index_size=%d%s%s%s%s)",
-                                                       builder->index_size,
-                                                       flags & PAN_INDIRECT_DRAW_HAS_PSIZ ?
-                                                       ",psiz" : "",
-                                                       flags & PAN_INDIRECT_DRAW_PRIMITIVE_RESTART ?
-                                                       ",primitive_restart" : "",
-                                                       flags & PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE ?
-                                                       ",update_primitive_size" : "",
-                                                       flags & PAN_INDIRECT_DRAW_IDVS ?
-                                                       ",idvs" : "");
-        }
-
-        extract_inputs(builder);
-}
-
-static void
-update_dcd(struct indirect_draw_shader_builder *builder,
-           nir_ssa_def *job_ptr,
-           unsigned draw_offset)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *draw_w01 =
-                load_global(b, get_address_imm(b, job_ptr, draw_offset + WORD(0)), 2, 32);
-        nir_ssa_def *draw_w0 = nir_channel(b, draw_w01, 0);
-
-        /* Update DRAW.{instance_size,offset_start} */
-        nir_ssa_def *instance_size =
-                nir_bcsel(b,
-                          nir_ult(b, builder->draw.instance_count, nir_imm_int(b, 2)),
-                          nir_imm_int(b, 0), builder->instance_size.packed);
-        draw_w01 = nir_vec2(b,
-                            nir_ior(b, nir_iand_imm(b, draw_w0, 0xffff),
-                                    nir_ishl(b, instance_size, nir_imm_int(b, 16))),
-                            builder->jobs.offset_start);
-        store_global(b, get_address_imm(b, job_ptr, draw_offset + WORD(0)),
-                     draw_w01, 2);
-}
-
-static void
-update_job(struct indirect_draw_shader_builder *builder, enum mali_job_type type)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *job_ptr =
-                type == MALI_JOB_TYPE_VERTEX ?
-                builder->jobs.vertex_job : builder->jobs.tiler_job;
-
-        /* Update the invocation words. */
-        store_global(b, get_address_imm(b, job_ptr, WORD(8)),
-                     builder->jobs.invocation, 2);
-
-        unsigned draw_offset =
-                type == MALI_JOB_TYPE_VERTEX ?
-                pan_section_offset(COMPUTE_JOB, DRAW) :
-                pan_section_offset(TILER_JOB, DRAW);
-        unsigned prim_offset = pan_section_offset(TILER_JOB, PRIMITIVE);
-        unsigned psiz_offset = pan_section_offset(TILER_JOB, PRIMITIVE_SIZE);
-        unsigned index_size = builder->index_size;
-
-        if (type == MALI_JOB_TYPE_TILER) {
-                /* Update PRIMITIVE.{base_vertex_offset,count} */
-                store_global(b,
-                             get_address_imm(b, job_ptr, prim_offset + WORD(1)),
-                             builder->jobs.base_vertex_offset, 1);
-                store_global(b,
-                             get_address_imm(b, job_ptr, prim_offset + WORD(3)),
-                             nir_iadd_imm(b, builder->draw.vertex_count, -1), 1);
-
-                if (index_size) {
-                        nir_ssa_def *addr =
-                                get_address_imm(b, job_ptr, prim_offset + WORD(4));
-                        nir_ssa_def *indices = load_global(b, addr, 1, 64);
-                        nir_ssa_def *offset =
-                                nir_imul_imm(b, builder->draw.vertex_start, index_size);
-
-                        indices = get_address(b, indices, offset);
-                        store_global(b, addr, indices, 2);
-                }
-
-                /* Update PRIMITIVE_SIZE.size_array */
-                if ((builder->flags & PAN_INDIRECT_DRAW_HAS_PSIZ) &&
-                    (builder->flags & PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE)) {
-                        store_global(b,
-                                     get_address_imm(b, job_ptr, psiz_offset + WORD(0)),
-                                     builder->varyings.psiz_ptr, 2);
-                }
-
-                /* Update DRAW.position */
-                store_global(b, get_address_imm(b, job_ptr, draw_offset + WORD(4)),
-                             builder->varyings.pos_ptr, 2);
-        }
-
-        update_dcd(builder, job_ptr, draw_offset);
-
-        if (builder->flags & PAN_INDIRECT_DRAW_IDVS) {
-                assert(type == MALI_JOB_TYPE_TILER);
-
-                update_dcd(builder, job_ptr,
-                           pan_section_offset(INDEXED_VERTEX_JOB, VERTEX_DRAW));
-        }
-}
-
-static void
-split_div(nir_builder *b, nir_ssa_def *div, nir_ssa_def **r_e, nir_ssa_def **d)
-{
-        /* TODO: Lower this 64bit div to something GPU-friendly */
-        nir_ssa_def *r = nir_imax(b, nir_ufind_msb(b, div), nir_imm_int(b, 0));
-        nir_ssa_def *div64 = nir_u2u64(b, div);
-        nir_ssa_def *half_div64 = nir_u2u64(b, nir_ushr_imm(b, div, 1));
-        nir_ssa_def *f0 = nir_iadd(b,
-                                   nir_ishl(b, nir_imm_int64(b, 1),
-                                            nir_iadd_imm(b, r, 32)),
-                                   half_div64);
-        nir_ssa_def *fi = nir_idiv(b, f0, div64);
-        nir_ssa_def *ff = nir_isub(b, f0, nir_imul(b, fi, div64));
-        nir_ssa_def *e = nir_bcsel(b, nir_ult(b, half_div64, ff),
-                                   nir_imm_int(b, 1 << 5), nir_imm_int(b, 0));
-        *d = nir_iand_imm(b, nir_u2u32(b, fi), ~(1 << 31));
-        *r_e = nir_ior(b, r, e);
-}
-
-static void
-update_vertex_attrib_buf(struct indirect_draw_shader_builder *builder,
-                         nir_ssa_def *attrib_buf_ptr,
-                         enum mali_attribute_type type,
-                         nir_ssa_def *div1,
-                         nir_ssa_def *div2)
-{
-        nir_builder *b = &builder->b;
-        unsigned type_mask = BITFIELD_MASK(6);
-        nir_ssa_def *w01 = load_global(b, attrib_buf_ptr, 2, 32);
-        nir_ssa_def *w0 = nir_channel(b, w01, 0);
-        nir_ssa_def *w1 = nir_channel(b, w01, 1);
-
-        /* Word 0 and 1 of the attribute descriptor contain the type,
-         * pointer and the the divisor exponent.
-         */
-        w0 = nir_iand_imm(b, nir_channel(b, w01, 0), ~type_mask);
-        w0 = nir_ior(b, w0, nir_imm_int(b, type));
-        w1 = nir_ior(b, w1, nir_ishl(b, div1, nir_imm_int(b, 24)));
-
-        store_global(b, attrib_buf_ptr, nir_vec2(b, w0, w1), 2);
-
-        if (type == MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR) {
-                /* If the divisor is not a power of two, the divisor numerator
-                 * is passed in word 1 of the continuation attribute (word 5
-                 * if we consider the attribute and its continuation as a
-                 * single attribute).
-                 */
-                assert(div2);
-                store_global(b, get_address_imm(b, attrib_buf_ptr, WORD(5)),
-                             div2, 1);
-        }
-}
-
-static void
-zero_attrib_buf_stride(struct indirect_draw_shader_builder *builder,
-                       nir_ssa_def *attrib_buf_ptr)
-{
-        /* Stride is an unadorned 32-bit uint at word 2 */
-        nir_builder *b = &builder->b;
-        store_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)),
-                        nir_imm_int(b, 0), 1);
-}
-
-static void
-adjust_attrib_offset(struct indirect_draw_shader_builder *builder,
-                     nir_ssa_def *attrib_ptr, nir_ssa_def *attrib_buf_ptr,
-                     nir_ssa_def *instance_div)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *zero = nir_imm_int(b, 0);
-        nir_ssa_def *two = nir_imm_int(b, 2);
-        nir_ssa_def *sub_cur_offset =
-                nir_iand(b, nir_ine(b, builder->jobs.offset_start, zero),
-                         nir_uge(b, builder->draw.instance_count, two));
-
-        nir_ssa_def *add_base_inst_offset =
-                nir_iand(b, nir_ine(b, builder->draw.start_instance, zero),
-                         nir_ine(b, instance_div, zero));
-
-        IF (nir_ior(b, sub_cur_offset, add_base_inst_offset)) {
-                nir_ssa_def *offset =
-                        load_global(b, get_address_imm(b, attrib_ptr, WORD(1)), 1, 32);
-                nir_ssa_def *stride =
-                        load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(2)), 1, 32);
-
-                /* Per-instance data needs to be offset in response to a
-                 * delayed start in an indexed draw.
-                 */
-
-                IF (add_base_inst_offset) {
-                        offset = nir_iadd(b, offset,
-                                          nir_idiv(b,
-                                                   nir_imul(b, stride,
-                                                            builder->draw.start_instance),
-                                                   instance_div));
-                } ENDIF
-
-                IF (sub_cur_offset) {
-                        offset = nir_isub(b, offset,
-                                          nir_imul(b, stride,
-                                                   builder->jobs.offset_start));
-                } ENDIF
-
-                store_global(b, get_address_imm(b, attrib_ptr, WORD(1)),
-                             offset, 1);
-        } ENDIF
-}
-
-/* x is power of two or zero <===> x has 0 (zero) or 1 (POT) bits set */
-
-static nir_ssa_def *
-nir_is_power_of_two_or_zero(nir_builder *b, nir_ssa_def *x)
-{
-        return nir_ult(b, nir_bit_count(b, x), nir_imm_int(b, 2));
-}
-
-/* Based on panfrost_emit_vertex_data() */
-
-static void
-update_vertex_attribs(struct indirect_draw_shader_builder *builder)
-{
-        nir_builder *b = &builder->b;
-        nir_variable *attrib_idx_var =
-                nir_local_variable_create(b->impl, glsl_uint_type(),
-                                          "attrib_idx");
-        nir_store_var(b, attrib_idx_var, nir_imm_int(b, 0), 1);
-
-#if PAN_ARCH <= 5
-        nir_ssa_def *single_instance =
-                nir_ult(b, builder->draw.instance_count, nir_imm_int(b, 2));
-#endif
-
-        LOOP {
-                nir_ssa_def *attrib_idx = nir_load_var(b, attrib_idx_var);
-                IF (nir_uge(b, attrib_idx, builder->attribs.attrib_count))
-                        BREAK;
-                ENDIF
-
-                nir_ssa_def *attrib_buf_ptr =
-                         get_address(b, builder->attribs.attrib_bufs,
-                                     nir_imul_imm(b, attrib_idx,
-                                                  2 * pan_size(ATTRIBUTE_BUFFER)));
-                nir_ssa_def *attrib_ptr =
-                         get_address(b, builder->attribs.attribs,
-                                     nir_imul_imm(b, attrib_idx,
-                                                  pan_size(ATTRIBUTE)));
-
-                nir_ssa_def *r_e, *d;
-
-#if PAN_ARCH <= 5
-                IF (nir_ieq_imm(b, attrib_idx, PAN_VERTEX_ID)) {
-                        nir_ssa_def *r_p =
-                                nir_bcsel(b, single_instance,
-                                          nir_imm_int(b, 0x9f),
-                                          builder->instance_size.packed);
-
-                        store_global(b,
-                                     get_address_imm(b, attrib_buf_ptr, WORD(4)),
-                                     nir_ishl(b, r_p, nir_imm_int(b, 24)), 1);
-
-                        nir_store_var(b, attrib_idx_var,
-                                      nir_iadd_imm(b, attrib_idx, 1), 1);
-                        CONTINUE;
-                } ENDIF
-
-                IF (nir_ieq_imm(b, attrib_idx, PAN_INSTANCE_ID)) {
-                        split_div(b, builder->instance_size.padded,
-                                  &r_e, &d);
-                        nir_ssa_def *default_div =
-                                nir_ior(b, single_instance,
-                                        nir_ult(b,
-                                                builder->instance_size.padded,
-                                                nir_imm_int(b, 2)));
-                        r_e = nir_bcsel(b, default_div,
-                                        nir_imm_int(b, 0x3f), r_e);
-                        d = nir_bcsel(b, default_div,
-                                      nir_imm_int(b, (1u << 31) - 1), d);
-                        store_global(b,
-                                     get_address_imm(b, attrib_buf_ptr, WORD(1)),
-                                     nir_vec2(b, nir_ishl(b, r_e, nir_imm_int(b, 24)), d),
-                                     2);
-                        nir_store_var(b, attrib_idx_var,
-                                      nir_iadd_imm(b, attrib_idx, 1), 1);
-                        CONTINUE;
-                } ENDIF
-#endif
-
-                nir_ssa_def *instance_div =
-                        load_global(b, get_address_imm(b, attrib_buf_ptr, WORD(7)), 1, 32);
-
-                nir_ssa_def *div = nir_imul(b, instance_div, builder->instance_size.padded);
-
-                nir_ssa_def *multi_instance =
-                        nir_uge(b, builder->draw.instance_count, nir_imm_int(b, 2));
-
-                IF (nir_ine(b, div, nir_imm_int(b, 0))) {
-                        IF (multi_instance) {
-                                IF (nir_is_power_of_two_or_zero(b, div)) {
-                                        nir_ssa_def *exp =
-                                                nir_imax(b, nir_ufind_msb(b, div),
-                                                         nir_imm_int(b, 0));
-                                        update_vertex_attrib_buf(builder, attrib_buf_ptr,
-                                                                 MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR,
-                                                                 exp, NULL);
-                                } ELSE {
-                                        split_div(b, div, &r_e, &d);
-                                        update_vertex_attrib_buf(builder, attrib_buf_ptr,
-                                                                 MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR,
-                                                                 r_e, d);
-                                } ENDIF
-                        } ELSE {
-                                /* Single instance with a non-0 divisor: all
-                                 * accesses should point to attribute 0 */
-                                zero_attrib_buf_stride(builder, attrib_buf_ptr);
-                        } ENDIF
-
-                        adjust_attrib_offset(builder, attrib_ptr, attrib_buf_ptr, instance_div);
-                } ELSE IF (multi_instance) {
-                        update_vertex_attrib_buf(builder, attrib_buf_ptr,
-                                        MALI_ATTRIBUTE_TYPE_1D_MODULUS,
-                                        builder->instance_size.packed, NULL);
-                } ENDIF ENDIF
-
-                nir_store_var(b, attrib_idx_var, nir_iadd_imm(b, attrib_idx, 1), 1);
-        }
-}
-
-static nir_ssa_def *
-update_varying_buf(struct indirect_draw_shader_builder *builder,
-                   nir_ssa_def *varying_buf_ptr,
-                   nir_ssa_def *vertex_count)
-{
-        nir_builder *b = &builder->b;
-
-        nir_ssa_def *stride =
-                load_global(b, get_address_imm(b, varying_buf_ptr, WORD(2)), 1, 32);
-        nir_ssa_def *size = nir_imul(b, stride, vertex_count);
-        nir_ssa_def *aligned_size =
-                nir_iand_imm(b, nir_iadd_imm(b, size, 63), ~63);
-        nir_ssa_def *var_mem_ptr =
-                nir_load_var(b, builder->varyings.mem_ptr);
-        nir_ssa_def *w0 =
-                nir_ior(b, nir_unpack_64_2x32_split_x(b, var_mem_ptr),
-                        nir_imm_int(b, MALI_ATTRIBUTE_TYPE_1D));
-        nir_ssa_def *w1 = nir_unpack_64_2x32_split_y(b, var_mem_ptr);
-        store_global(b, get_address_imm(b, varying_buf_ptr, WORD(0)),
-                     nir_vec4(b, w0, w1, stride, size), 4);
-
-        nir_store_var(b, builder->varyings.mem_ptr,
-                      get_address(b, var_mem_ptr, aligned_size), 3);
-
-        return var_mem_ptr;
-}
-
-/* Based on panfrost_emit_varying_descriptor() */
-
-static void
-update_varyings(struct indirect_draw_shader_builder *builder)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *vertex_count =
-                nir_imul(b, builder->instance_size.padded,
-                         builder->draw.instance_count);
-        nir_ssa_def *buf_ptr =
-                get_address_imm(b, builder->varyings.varying_bufs,
-                                PAN_VARY_GENERAL *
-                                pan_size(ATTRIBUTE_BUFFER));
-        update_varying_buf(builder, buf_ptr, vertex_count);
-
-        buf_ptr = get_address_imm(b, builder->varyings.varying_bufs,
-                                  PAN_VARY_POSITION *
-                                  pan_size(ATTRIBUTE_BUFFER));
-        builder->varyings.pos_ptr =
-                update_varying_buf(builder, buf_ptr, vertex_count);
-
-        if (builder->flags & PAN_INDIRECT_DRAW_HAS_PSIZ) {
-                buf_ptr = get_address_imm(b, builder->varyings.varying_bufs,
-                                          PAN_VARY_PSIZ *
-                                          pan_size(ATTRIBUTE_BUFFER));
-                builder->varyings.psiz_ptr =
-                        update_varying_buf(builder, buf_ptr, vertex_count);
-        }
-
-        set_draw_ctx_field(builder, varying_mem,
-                           nir_load_var(b, builder->varyings.mem_ptr));
-}
-
-/* Based on panfrost_pack_work_groups_compute() */
-
-static void
-get_invocation(struct indirect_draw_shader_builder *builder)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *one = nir_imm_int(b, 1);
-        nir_ssa_def *max_vertex =
-                nir_usub_sat(b, builder->instance_size.raw, one);
-        nir_ssa_def *max_instance =
-                nir_usub_sat(b, builder->draw.instance_count, one);
-        nir_ssa_def *split =
-                nir_bcsel(b, nir_ieq_imm(b, max_instance, 0),
-                          nir_imm_int(b, 32),
-                          nir_iadd_imm(b, nir_ufind_msb(b, max_vertex), 1));
-
-        builder->jobs.invocation =
-                nir_vec2(b,
-                         nir_ior(b, max_vertex,
-                                 nir_ishl(b, max_instance, split)),
-                         nir_ior(b, nir_ishl(b, split, nir_imm_int(b, 22)),
-                                 nir_imm_int(b, 2 << 28)));
-}
-
-static nir_ssa_def *
-nir_align_pot(nir_builder *b, nir_ssa_def *val, unsigned pot)
-{
-        assert(pot != 0 && util_is_power_of_two_or_zero(pot));
-
-        return nir_iand_imm(b, nir_iadd_imm(b, val, pot - 1), ~(pot - 1));
-}
-
-/* Based on panfrost_padded_vertex_count() */
-
-static nir_ssa_def *
-get_padded_count(nir_builder *b, nir_ssa_def *val, nir_ssa_def **packed)
-{
-        nir_ssa_def *one = nir_imm_int(b, 1);
-        nir_ssa_def *zero = nir_imm_int(b, 0);
-        nir_ssa_def *eleven = nir_imm_int(b, 11);
-        nir_ssa_def *four = nir_imm_int(b, 4);
-
-        nir_ssa_def *exp =
-                nir_usub_sat(b, nir_imax(b, nir_ufind_msb(b, val), zero), four);
-        nir_ssa_def *base = nir_ushr(b, val, exp);
-
-        base = nir_iadd(b, base,
-                        nir_bcsel(b, nir_ine(b, val, nir_ishl(b, base, exp)), one, zero));
-
-        nir_ssa_def *rshift = nir_imax(b, nir_find_lsb(b, base), zero);
-        exp = nir_iadd(b, exp, rshift);
-        base = nir_ushr(b, base, rshift);
-        base = nir_iadd(b, base, nir_bcsel(b, nir_uge(b, base, eleven), one, zero));
-        rshift = nir_imax(b, nir_find_lsb(b, base), zero);
-        exp = nir_iadd(b, exp, rshift);
-        base = nir_ushr(b, base, rshift);
-
-        *packed = nir_ior(b, exp,
-                          nir_ishl(b, nir_ushr_imm(b, base, 1), nir_imm_int(b, 5)));
-        return nir_ishl(b, base, exp);
-}
-
-static void
-update_jobs(struct indirect_draw_shader_builder *builder)
-{
-        get_invocation(builder);
-
-        if (!(builder->flags & PAN_INDIRECT_DRAW_IDVS))
-                update_job(builder, MALI_JOB_TYPE_VERTEX);
-
-        update_job(builder, MALI_JOB_TYPE_TILER);
-}
-
-
-static void
-set_null_job(struct indirect_draw_shader_builder *builder,
-             nir_ssa_def *job_ptr)
-{
-        nir_builder *b = &builder->b;
-        nir_ssa_def *w4 = get_address_imm(b, job_ptr, WORD(4));
-        nir_ssa_def *val = load_global(b, w4, 1, 32);
-
-        /* Set job type to NULL (AKA NOOP) */
-        val = nir_ior(b, nir_iand_imm(b, val, 0xffffff01),
-                      nir_imm_int(b, MALI_JOB_TYPE_NULL << 1));
-        store_global(b, w4, val, 1);
-}
-
-static void
-get_instance_size(struct indirect_draw_shader_builder *builder)
-{
-        nir_builder *b = &builder->b;
-
-        if (!builder->index_size) {
-                builder->jobs.base_vertex_offset = nir_imm_int(b, 0);
-                builder->jobs.offset_start = builder->draw.vertex_start;
-                builder->instance_size.raw = builder->draw.vertex_count;
-                return;
-        }
-
-        unsigned index_size = builder->index_size;
-        nir_ssa_def *min = get_min_max_ctx_field(builder, min);
-        nir_ssa_def *max = get_min_max_ctx_field(builder, max);
-
-        /* We handle unaligned indices here to avoid the extra complexity in
-         * the min/max search job.
-         */
-        if (builder->index_size < 4) {
-                nir_variable *min_var =
-                        nir_local_variable_create(b->impl, glsl_uint_type(), "min");
-                nir_store_var(b, min_var, min, 1);
-                nir_variable *max_var =
-                        nir_local_variable_create(b->impl, glsl_uint_type(), "max");
-                nir_store_var(b, max_var, max, 1);
-
-                nir_ssa_def *base =
-                        get_address(b, builder->draw.index_buf,
-                                    nir_imul_imm(b, builder->draw.vertex_start, index_size));
-                nir_ssa_def *offset = nir_iand_imm(b, nir_unpack_64_2x32_split_x(b, base), 3);
-                nir_ssa_def *end =
-                        nir_iadd(b, offset,
-                                 nir_imul_imm(b, builder->draw.vertex_count, index_size));
-                nir_ssa_def *aligned_end = nir_iand_imm(b, end, ~3);
-                unsigned shift = index_size * 8;
-                unsigned mask = (1 << shift) - 1;
-
-                base = nir_iand(b, base, nir_imm_int64(b, ~3ULL));
-
-                /* Unaligned start offset, we need to ignore any data that's
-                 * outside the requested range. We also handle ranges that are
-                 * covering less than 2 words here.
-                 */
-                IF (nir_ior(b, nir_ine(b, offset, nir_imm_int(b, 0)), nir_ieq(b, aligned_end, nir_imm_int(b, 0)))) {
-                        min = nir_load_var(b, min_var);
-                        max = nir_load_var(b, max_var);
-
-                        nir_ssa_def *val = load_global(b, base, 1, 32);
-                        for (unsigned i = 0; i < sizeof(uint32_t); i += index_size) {
-                                nir_ssa_def *oob =
-                                        nir_ior(b,
-                                                nir_ult(b, nir_imm_int(b, i), offset),
-                                                nir_uge(b, nir_imm_int(b, i), end));
-                                nir_ssa_def *data = nir_iand_imm(b, val, mask);
-
-                                min = nir_umin(b, min,
-                                               nir_bcsel(b, oob, nir_imm_int(b, UINT32_MAX), data));
-                                max = nir_umax(b, max,
-                                               nir_bcsel(b, oob, nir_imm_int(b, 0), data));
-                                val = nir_ushr_imm(b, val, shift);
-                        }
-
-                        nir_store_var(b, min_var, min, 1);
-                        nir_store_var(b, max_var, max, 1);
-                } ENDIF
-
-                nir_ssa_def *remaining = nir_isub(b, end, aligned_end);
-
-                /* The last word contains less than 4bytes of data, we need to
-                 * discard anything falling outside the requested range.
-                 */
-                IF (nir_iand(b, nir_ine(b, end, aligned_end), nir_ine(b, aligned_end, nir_imm_int(b, 0)))) {
-                        min = nir_load_var(b, min_var);
-                        max = nir_load_var(b, max_var);
-
-                        nir_ssa_def *val = load_global(b, get_address(b, base, aligned_end), 1, 32);
-                        for (unsigned i = 0; i < sizeof(uint32_t); i += index_size) {
-                                nir_ssa_def *oob = nir_uge(b, nir_imm_int(b, i), remaining);
-                                nir_ssa_def *data = nir_iand_imm(b, val, mask);
-
-                                min = nir_umin(b, min,
-                                               nir_bcsel(b, oob, nir_imm_int(b, UINT32_MAX), data));
-                                max = nir_umax(b, max,
-                                               nir_bcsel(b, oob, nir_imm_int(b, 0), data));
-                                val = nir_ushr_imm(b, val, shift);
-                        }
-
-                        nir_store_var(b, min_var, min, 1);
-                        nir_store_var(b, max_var, max, 1);
-                } ENDIF
-
-                min = nir_load_var(b, min_var);
-                max = nir_load_var(b, max_var);
-        }
-
-        builder->jobs.base_vertex_offset = nir_ineg(b, min);
-        builder->jobs.offset_start = nir_iadd(b, min, builder->draw.index_bias);
-        builder->instance_size.raw = nir_iadd_imm(b, nir_usub_sat(b, max, min), 1);
-}
-
-/* Patch a draw sequence */
-
-static void
-patch(struct indirect_draw_shader_builder *builder)
-{
-        unsigned index_size = builder->index_size;
-        nir_builder *b = &builder->b;
-
-        nir_ssa_def *draw_ptr = builder->draw.draw_buf;
-
-        if (index_size) {
-                builder->draw.vertex_count = get_indexed_draw_field(b, draw_ptr, count);
-                builder->draw.start_instance = get_indexed_draw_field(b, draw_ptr, start_instance);
-                builder->draw.instance_count =
-                        get_indexed_draw_field(b, draw_ptr, instance_count);
-                builder->draw.vertex_start = get_indexed_draw_field(b, draw_ptr, start);
-                builder->draw.index_bias = get_indexed_draw_field(b, draw_ptr, index_bias);
-        } else {
-                builder->draw.vertex_count = get_draw_field(b, draw_ptr, count);
-                builder->draw.start_instance = get_draw_field(b, draw_ptr, start_instance);
-                builder->draw.instance_count = get_draw_field(b, draw_ptr, instance_count);
-                builder->draw.vertex_start = get_draw_field(b, draw_ptr, start);
-        }
-
-        assert(builder->draw.vertex_count->num_components);
-
-        nir_ssa_def *num_vertices =
-                nir_imul(b, builder->draw.vertex_count, builder->draw.instance_count);
-
-        IF (nir_ieq(b, num_vertices, nir_imm_int(b, 0))) {
-                /* If there's nothing to draw, turn the vertex/tiler jobs into
-                 * null jobs.
-                 */
-                if (!(builder->flags & PAN_INDIRECT_DRAW_IDVS))
-                        set_null_job(builder, builder->jobs.vertex_job);
-
-                set_null_job(builder, builder->jobs.tiler_job);
-        } ELSE {
-                get_instance_size(builder);
-
-                nir_ssa_def *count = builder->instance_size.raw;
-
-                /* IDVS requires padding to a multiple of 4 */
-                if (builder->flags & PAN_INDIRECT_DRAW_IDVS)
-                        count = nir_align_pot(b, count, 4);
-
-                builder->instance_size.padded =
-                        get_padded_count(b, count,
-                                         &builder->instance_size.packed);
-
-                update_varyings(builder);
-                update_jobs(builder);
-                update_vertex_attribs(builder);
-
-                IF (nir_ine(b, builder->jobs.first_vertex_sysval, nir_imm_int64(b, 0))) {
-                        store_global(b, builder->jobs.first_vertex_sysval,
-                                     builder->jobs.offset_start, 1);
-                } ENDIF
-
-                IF (nir_ine(b, builder->jobs.base_vertex_sysval, nir_imm_int64(b, 0))) {
-                        store_global(b, builder->jobs.base_vertex_sysval,
-                                     index_size ?
-                                     builder->draw.index_bias :
-                                     nir_imm_int(b, 0),
-                                     1);
-                } ENDIF
-
-                IF (nir_ine(b, builder->jobs.base_instance_sysval, nir_imm_int64(b, 0))) {
-                        store_global(b, builder->jobs.base_instance_sysval,
-                                     builder->draw.start_instance, 1);
-                } ENDIF
-        } ENDIF
-}
-
-/* Search the min/max index in the range covered by the indirect draw call */
-
-static void
-get_index_min_max(struct indirect_draw_shader_builder *builder)
-{
-        nir_ssa_def *restart_index = builder->draw.restart_index;
-        unsigned index_size = builder->index_size;
-        nir_builder *b = &builder->b;
-
-        nir_ssa_def *draw_ptr = builder->draw.draw_buf;
-
-        builder->draw.vertex_count = get_draw_field(b, draw_ptr, count);
-        builder->draw.vertex_start = get_draw_field(b, draw_ptr, start);
-
-        nir_ssa_def *thread_id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
-        nir_variable *min_var =
-                nir_local_variable_create(b->impl, glsl_uint_type(), "min");
-        nir_store_var(b, min_var, nir_imm_int(b, UINT32_MAX), 1);
-        nir_variable *max_var =
-                nir_local_variable_create(b->impl, glsl_uint_type(), "max");
-        nir_store_var(b, max_var, nir_imm_int(b, 0), 1);
-
-        nir_ssa_def *base =
-                get_address(b, builder->draw.index_buf,
-                            nir_imul_imm(b, builder->draw.vertex_start, index_size));
-
-
-        nir_ssa_def *start = nir_iand_imm(b, nir_unpack_64_2x32_split_x(b, base), 3);
-        nir_ssa_def *end =
-                nir_iadd(b, start, nir_imul_imm(b, builder->draw.vertex_count, index_size));
-
-        base = nir_iand(b, base, nir_imm_int64(b, ~3ULL));
-
-        /* Align on 4 bytes, non-aligned indices are handled in the indirect draw job. */
-        start = nir_iand_imm(b, nir_iadd_imm(b, start, 3), ~3);
-        end = nir_iand_imm(b, end, ~3);
-
-        /* Add the job offset. */
-        start = nir_iadd(b, start, nir_imul_imm(b, thread_id, sizeof(uint32_t)));
-
-        nir_variable *offset_var =
-                nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
-        nir_store_var(b, offset_var, start, 1);
-
-        LOOP {
-                nir_ssa_def *offset = nir_load_var(b, offset_var);
-                IF (nir_uge(b, offset, end))
-                        BREAK;
-                ENDIF
-
-                nir_ssa_def *val = load_global(b, get_address(b, base, offset), 1, 32);
-                nir_ssa_def *old_min = nir_load_var(b, min_var);
-                nir_ssa_def *old_max = nir_load_var(b, max_var);
-                nir_ssa_def *new_min;
-                nir_ssa_def *new_max;
-
-                /* TODO: use 8/16 bit arithmetic when index_size < 4. */
-                for (unsigned i = 0; i < 4; i += index_size) {
-                        nir_ssa_def *data = nir_ushr_imm(b, val, i * 8);
-                        data = nir_iand_imm(b, data, (1ULL << (index_size * 8)) - 1);
-                        new_min = nir_umin(b, old_min, data);
-                        new_max = nir_umax(b, old_max, data);
-                        if (restart_index) {
-                                new_min = nir_bcsel(b, nir_ine(b, restart_index, data), new_min, old_min);
-                                new_max = nir_bcsel(b, nir_ine(b, restart_index, data), new_max, old_max);
-                        }
-                        old_min = new_min;
-                        old_max = new_max;
-                }
-
-                nir_store_var(b, min_var, new_min, 1);
-                nir_store_var(b, max_var, new_max, 1);
-                nir_store_var(b, offset_var,
-                              nir_iadd_imm(b, offset, MIN_MAX_JOBS * sizeof(uint32_t)), 1);
-        }
-
-        IF (nir_ult(b, start, end))
-                update_min(builder, nir_load_var(b, min_var));
-                update_max(builder, nir_load_var(b, max_var));
-        ENDIF
-}
-
-static unsigned
-get_shader_id(unsigned flags, unsigned index_size, bool index_min_max_search)
-{
-        if (!index_min_max_search) {
-                flags &= PAN_INDIRECT_DRAW_FLAGS_MASK;
-                flags &= ~PAN_INDIRECT_DRAW_INDEX_SIZE_MASK;
-                if (index_size)
-                        flags |= (util_logbase2(index_size) + 1);
-                return flags;
-        }
-
-        return ((flags & PAN_INDIRECT_DRAW_PRIMITIVE_RESTART) ?
-                PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART :
-                PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX) +
-               util_logbase2(index_size);
-}
-
-static void
-create_indirect_draw_shader(struct panfrost_device *dev,
-                            unsigned flags, unsigned index_size,
-                            bool index_min_max_search)
-{
-        assert(flags < PAN_INDIRECT_DRAW_NUM_SHADERS);
-        struct indirect_draw_shader_builder builder;
-        init_shader_builder(&builder, dev, flags, index_size, index_min_max_search);
-
-        nir_builder *b = &builder.b;
-
-        if (index_min_max_search)
-                get_index_min_max(&builder);
-        else
-                patch(&builder);
-
-        struct panfrost_compile_inputs inputs = {
-                .gpu_id = dev->gpu_id,
-                .fixed_sysval_ubo = -1,
-                .no_ubo_to_push = true,
-        };
-        struct pan_shader_info shader_info;
-        struct util_dynarray binary;
-
-        util_dynarray_init(&binary, NULL);
-        GENX(pan_shader_compile)(b->shader, &inputs, &binary, &shader_info);
-
-        assert(!shader_info.tls_size);
-        assert(!shader_info.wls_size);
-        assert(!shader_info.sysvals.sysval_count);
-
-        shader_info.push.count =
-                DIV_ROUND_UP(sizeof(struct indirect_draw_inputs), 4);
-
-        unsigned shader_id = get_shader_id(flags, index_size, index_min_max_search);
-        struct pan_indirect_draw_shader *draw_shader =
-                &dev->indirect_draw_shaders.shaders[shader_id];
-        void *state = dev->indirect_draw_shaders.states->ptr.cpu +
-                      (shader_id * pan_size(RENDERER_STATE));
-
-        pthread_mutex_lock(&dev->indirect_draw_shaders.lock);
-        if (!draw_shader->rsd) {
-                mali_ptr address =
-                        pan_pool_upload_aligned(dev->indirect_draw_shaders.bin_pool,
-                                                binary.data, binary.size,
-                                                PAN_ARCH >= 6 ? 128 : 64);
-
-                util_dynarray_fini(&binary);
-
-                pan_pack(state, RENDERER_STATE, cfg) {
-                        pan_shader_prepare_rsd(&shader_info, address, &cfg);
-                }
-
-                draw_shader->push = shader_info.push;
-                draw_shader->rsd = dev->indirect_draw_shaders.states->ptr.gpu +
-                                   (shader_id * pan_size(RENDERER_STATE));
-        }
-        pthread_mutex_unlock(&dev->indirect_draw_shaders.lock);
-
-        ralloc_free(b->shader);
-}
-
-static mali_ptr
-get_renderer_state(struct panfrost_device *dev, unsigned flags,
-                   unsigned index_size, bool index_min_max_search)
-{
-        unsigned shader_id = get_shader_id(flags, index_size, index_min_max_search);
-        struct pan_indirect_draw_shader *info =
-                &dev->indirect_draw_shaders.shaders[shader_id];
-
-        if (!info->rsd) {
-                create_indirect_draw_shader(dev, flags, index_size,
-                                            index_min_max_search);
-                assert(info->rsd);
-        }
-
-        return info->rsd;
-}
-
-static mali_ptr
-get_tls(const struct panfrost_device *dev)
-{
-        return dev->indirect_draw_shaders.states->ptr.gpu +
-               (PAN_INDIRECT_DRAW_NUM_SHADERS * pan_size(RENDERER_STATE));
-}
-
-static void
-panfrost_indirect_draw_alloc_deps(struct panfrost_device *dev)
-{
-        pthread_mutex_lock(&dev->indirect_draw_shaders.lock);
-        if (dev->indirect_draw_shaders.states)
-                goto out;
-
-        unsigned state_bo_size = (PAN_INDIRECT_DRAW_NUM_SHADERS *
-                                  pan_size(RENDERER_STATE)) +
-                                 pan_size(LOCAL_STORAGE);
-
-        dev->indirect_draw_shaders.states =
-                panfrost_bo_create(dev, state_bo_size, 0, "Indirect draw states");
-
-        /* Prepare the thread storage descriptor now since it's invariant. */
-        void *tsd = dev->indirect_draw_shaders.states->ptr.cpu +
-                    (PAN_INDIRECT_DRAW_NUM_SHADERS * pan_size(RENDERER_STATE));
-        pan_pack(tsd, LOCAL_STORAGE, ls) {
-                ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
-        };
-
-        /* FIXME: Currently allocating 512M of growable memory, meaning that we
-         * only allocate what we really use, the problem is:
-         * - allocation happens 2M at a time, which might be more than we
-         *   actually need
-         * - the memory is attached to the device to speed up subsequent
-         *   indirect draws, but that also means it's never shrinked
-         */
-        dev->indirect_draw_shaders.varying_heap =
-                panfrost_bo_create(dev, 512 * 1024 * 1024,
-                                   PAN_BO_INVISIBLE | PAN_BO_GROWABLE,
-                                   "Indirect draw varying heap");
-
-out:
-        pthread_mutex_unlock(&dev->indirect_draw_shaders.lock);
-}
-
-static unsigned
-panfrost_emit_index_min_max_search(struct pan_pool *pool,
-                                   struct pan_scoreboard *scoreboard,
-                                   const struct pan_indirect_draw_info *draw_info,
-                                   const struct indirect_draw_inputs *inputs,
-                                   struct indirect_draw_context *draw_ctx)
-{
-        struct panfrost_device *dev = pool->dev;
-        unsigned index_size = draw_info->index_size;
-
-        if (!index_size)
-                return 0;
-
-        mali_ptr rsd =
-                get_renderer_state(dev, draw_info->flags,
-                                   draw_info->index_size, true);
-        struct panfrost_ptr job =
-                pan_pool_alloc_desc(pool, COMPUTE_JOB);
-        void *invocation =
-                pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
-        panfrost_pack_work_groups_compute(invocation,
-                                          1, 1, 1, MIN_MAX_JOBS, 1, 1,
-                                          false, false);
-
-        pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split = 7;
-        }
-
-        pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
-                cfg.state = rsd;
-                cfg.thread_storage = get_tls(pool->dev);
-                cfg.push_uniforms =
-                        pan_pool_upload_aligned(pool, inputs, sizeof(*inputs), 16);
-        }
-
-        return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
-                                false, false, 0, 0, &job, false);
-}
-
-unsigned
-GENX(panfrost_emit_indirect_draw)(struct pan_pool *pool,
-                                  struct pan_scoreboard *scoreboard,
-                                  const struct pan_indirect_draw_info *draw_info,
-                                  struct panfrost_ptr *ctx)
-{
-        struct panfrost_device *dev = pool->dev;
-
-        /* Currently only tested on Bifrost, but the logic should be the same
-         * on Midgard.
-         */
-        assert(pan_is_bifrost(dev));
-
-        panfrost_indirect_draw_alloc_deps(dev);
-
-        struct panfrost_ptr job =
-                pan_pool_alloc_desc(pool, COMPUTE_JOB);
-        mali_ptr rsd =
-                get_renderer_state(dev, draw_info->flags,
-                                   draw_info->index_size, false);
-
-        struct indirect_draw_context draw_ctx = {
-                .varying_mem = dev->indirect_draw_shaders.varying_heap->ptr.gpu,
-        };
-
-        struct panfrost_ptr draw_ctx_ptr = *ctx;
-        if (!draw_ctx_ptr.cpu) {
-                draw_ctx_ptr = pan_pool_alloc_aligned(pool,
-                                                      sizeof(draw_ctx),
-                                                      sizeof(mali_ptr));
-        }
-
-        struct indirect_draw_inputs inputs = {
-                .draw_ctx = draw_ctx_ptr.gpu,
-                .draw_buf = draw_info->draw_buf,
-                .index_buf = draw_info->index_buf,
-                .first_vertex_sysval = draw_info->first_vertex_sysval,
-                .base_vertex_sysval = draw_info->base_vertex_sysval,
-                .base_instance_sysval = draw_info->base_instance_sysval,
-                .vertex_job = draw_info->vertex_job,
-                .tiler_job = draw_info->tiler_job,
-                .attrib_bufs = draw_info->attrib_bufs,
-                .attribs = draw_info->attribs,
-                .varying_bufs = draw_info->varying_bufs,
-                .attrib_count = draw_info->attrib_count,
-        };
-
-        if (draw_info->index_size) {
-                inputs.restart_index = draw_info->restart_index;
-
-                struct panfrost_ptr min_max_ctx_ptr =
-                        pan_pool_alloc_aligned(pool,
-                                               sizeof(struct min_max_context),
-                                               4);
-                struct min_max_context *ctx = min_max_ctx_ptr.cpu;
-
-                ctx->min = UINT32_MAX;
-                ctx->max = 0;
-                inputs.min_max_ctx = min_max_ctx_ptr.gpu;
-        }
-
-        void *invocation =
-                pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
-        panfrost_pack_work_groups_compute(invocation,
-                                          1, 1, 1, 1, 1, 1,
-                                          false, false);
-
-        pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-                cfg.job_task_split = 2;
-        }
-
-        pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
-                cfg.state = rsd;
-                cfg.thread_storage = get_tls(pool->dev);
-                cfg.push_uniforms =
-                        pan_pool_upload_aligned(pool, &inputs, sizeof(inputs), 16);
-        }
-
-        unsigned global_dep = draw_info->last_indirect_draw;
-        unsigned local_dep =
-                panfrost_emit_index_min_max_search(pool, scoreboard, draw_info,
-                                                   &inputs, &draw_ctx);
-
-        if (!ctx->cpu) {
-                *ctx = draw_ctx_ptr;
-                memcpy(ctx->cpu, &draw_ctx, sizeof(draw_ctx));
-        }
-
-        return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
-                                false, true, local_dep, global_dep,
-                                &job, false);
-}
-
-void
-GENX(panfrost_init_indirect_draw_shaders)(struct panfrost_device *dev,
-                                          struct pan_pool *bin_pool)
-{
-        /* We allocate the states and varying_heap BO lazily to avoid
-         * reserving memory when indirect draws are not used.
-         */
-        pthread_mutex_init(&dev->indirect_draw_shaders.lock, NULL);
-        dev->indirect_draw_shaders.bin_pool = bin_pool;
-}
-
-void
-GENX(panfrost_cleanup_indirect_draw_shaders)(struct panfrost_device *dev)
-{
-        panfrost_bo_unreference(dev->indirect_draw_shaders.states);
-        panfrost_bo_unreference(dev->indirect_draw_shaders.varying_heap);
-        pthread_mutex_destroy(&dev->indirect_draw_shaders.lock);
-}
diff --git a/src/panfrost/lib/pan_indirect_draw.h b/src/panfrost/lib/pan_indirect_draw.h
deleted file mode 100644 (file)
index 6a77374..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2021 Collabora, Ltd.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __PAN_INDIRECT_DRAW_SHADERS_H__
-#define __PAN_INDIRECT_DRAW_SHADERS_H__
-
-#include "genxml/gen_macros.h"
-
-struct pan_device;
-struct pan_scoreboard;
-struct pan_pool;
-
-struct pan_indirect_draw_info {
-        mali_ptr draw_buf;
-        mali_ptr index_buf;
-        mali_ptr first_vertex_sysval;
-        mali_ptr base_vertex_sysval;
-        mali_ptr base_instance_sysval;
-        mali_ptr vertex_job;
-        mali_ptr tiler_job;
-        mali_ptr attrib_bufs;
-        mali_ptr attribs;
-        mali_ptr varying_bufs;
-        unsigned attrib_count;
-        uint32_t restart_index;
-        unsigned flags;
-        unsigned index_size;
-        unsigned last_indirect_draw;
-};
-
-unsigned
-GENX(panfrost_emit_indirect_draw)(struct pan_pool *pool,
-                                  struct pan_scoreboard *scoreboard,
-                                  const struct pan_indirect_draw_info *draw_info,
-                                  struct panfrost_ptr *ctx);
-
-void
-GENX(panfrost_init_indirect_draw_shaders)(struct panfrost_device *dev,
-                                          struct pan_pool *bin_pool);
-
-void
-GENX(panfrost_cleanup_indirect_draw_shaders)(struct panfrost_device *dev);
-
-#endif