freedreno: Add local_size to ir3_shader_variant
authorConnor Abbott <cwabbott0@gmail.com>
Wed, 10 Mar 2021 14:01:33 +0000 (15:01 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 22 Mar 2021 18:03:16 +0000 (18:03 +0000)
We want to use the local_size when available to calculate the threadsize
in ir3, and we need it to work with e.g. computerator where we don't
have a nir shader. Add a local_size field and use that in computerator
instead of of a separate structure that's inaccessable to core ir3.

Also set a dummy local_size in the tests to avoid a divide-by-zero.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9498>

src/freedreno/computerator/ir3_asm.c
src/freedreno/ir3/ir3_assembler.c
src/freedreno/ir3/ir3_assembler.h
src/freedreno/ir3/ir3_compiler_nir.c
src/freedreno/ir3/ir3_parser.y
src/freedreno/ir3/ir3_shader.h
src/freedreno/vulkan/tu_pipeline.c

index 405bd17..9c5e8d8 100644 (file)
@@ -40,7 +40,9 @@ ir3_asm_assemble(struct ir3_compiler *c, FILE *in)
        kernel->v = v;
        kernel->bin = v->bin;
 
-       memcpy(kernel->base.local_size, kernel->info.local_size, sizeof(kernel->base.local_size));
+       kernel->base.local_size[0] = v->local_size[0];
+       kernel->base.local_size[1] = v->local_size[0];
+       kernel->base.local_size[2] = v->local_size[0];
        kernel->base.num_bufs = kernel->info.num_bufs;
        memcpy(kernel->base.buf_sizes, kernel->info.buf_sizes, sizeof(kernel->base.buf_sizes));
 
index 94da3c4..fda3ea4 100644 (file)
@@ -47,6 +47,11 @@ ir3_parse_asm(struct ir3_compiler *c, struct ir3_kernel_info *info, FILE *in)
 
        info->numwg = INVALID_REG;
 
+       /* Provide a default local_size in case the shader doesn't set it, so that
+        * we don't crash at least.
+        */
+       v->local_size[0] = v->local_size[1] = v->local_size[2] = 1;
+
        v->ir = ir3_parse(v, info, in);
        if (!v->ir)
                goto error;
index 93f40ae..e1b3e65 100644 (file)
@@ -30,7 +30,6 @@
 #define MAX_BUFS 4
 
 struct ir3_kernel_info {
-       uint32_t local_size[3];
        uint32_t num_bufs;
        uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */
 
index 62be47c..8cb00c3 100644 (file)
@@ -3903,6 +3903,13 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
                        ctx->s->info.fs.needs_quad_helper_invocations)
                so->need_pixlod = true;
 
+       if (so->type == MESA_SHADER_COMPUTE) {
+               so->local_size[0] = ctx->s->info.cs.local_size[0];
+               so->local_size[1] = ctx->s->info.cs.local_size[1];
+               so->local_size[2] = ctx->s->info.cs.local_size[2];
+               so->local_size_variable = ctx->s->info.cs.local_size_variable;
+       }
+
 out:
        if (ret) {
                if (so->ir)
index 784e289..ce019c5 100644 (file)
@@ -641,9 +641,9 @@ const_val:         T_FLOAT   { $$ = fui($1); }
 |                  T_HEX     { $$ = $1;      }
 
 localsize_header:  T_A_LOCALSIZE const_val ',' const_val ',' const_val {
-                       info->local_size[0] = $2;
-                       info->local_size[1] = $4;
-                       info->local_size[2] = $6;
+                       variant->local_size[0] = $2;
+                       variant->local_size[1] = $4;
+                       variant->local_size[2] = $6;
 }
 
 const_header:      T_A_CONST '(' T_CONSTANT ')' const_val ',' const_val ',' const_val ',' const_val {
index 814c0a5..3d5abc9 100644 (file)
@@ -660,6 +660,9 @@ struct ir3_shader_variant {
        /* texture sampler pre-dispatches */
        uint32_t num_sampler_prefetch;
        struct ir3_sampler_prefetch sampler_prefetch[IR3_MAX_SAMPLER_PREFETCH];
+
+       uint16_t local_size[3];
+       bool local_size_variable;
 };
 
 static inline const char *
index 510044a..40ed460 100644 (file)
@@ -3050,7 +3050,7 @@ tu_compute_pipeline_create(VkDevice device,
    tu_setup_pvtmem(dev, pipeline, &pvtmem, v->pvtmem_size, v->pvtmem_per_wave);
 
    for (int i = 0; i < 3; i++)
-      pipeline->compute.local_size[i] = v->shader->nir->info.cs.local_size[i];
+      pipeline->compute.local_size[i] = v->local_size[i];
 
    struct tu_cs prog_cs;
    tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);