freedreno/ir3: add local_group_size
authorRob Clark <robdclark@gmail.com>
Tue, 6 Mar 2018 13:30:41 +0000 (08:30 -0500)
committerRob Clark <robdclark@gmail.com>
Sat, 31 Mar 2018 19:10:56 +0000 (15:10 -0400)
Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index a3e82ab..bca1a82 100644 (file)
@@ -2194,6 +2194,11 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
                        dst[i] = create_driver_param(ctx, IR3_DP_NUM_WORK_GROUPS_X + i);
                }
                break;
+       case nir_intrinsic_load_local_group_size:
+               for (int i = 0; i < intr->num_components; i++) {
+                       dst[i] = create_driver_param(ctx, IR3_DP_LOCAL_GROUP_SIZE_X + i);
+               }
+               break;
        case nir_intrinsic_discard_if:
        case nir_intrinsic_discard: {
                struct ir3_instruction *cond, *kill;
index 555c654..8ed7f56 100644 (file)
@@ -949,7 +949,9 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
                                [IR3_DP_NUM_WORK_GROUPS_X] = info->grid[0],
                                [IR3_DP_NUM_WORK_GROUPS_Y] = info->grid[1],
                                [IR3_DP_NUM_WORK_GROUPS_Z] = info->grid[2],
-                               /* do we need work-group-size? */
+                               [IR3_DP_LOCAL_GROUP_SIZE_X] = info->block[0],
+                               [IR3_DP_LOCAL_GROUP_SIZE_Y] = info->block[1],
+                               [IR3_DP_LOCAL_GROUP_SIZE_Z] = info->block[2],
                        };
 
                        ctx->emit_const(ring, SHADER_COMPUTE, offset * 4, 0,
index 040ea4c..272368c 100644 (file)
@@ -44,12 +44,15 @@ enum ir3_driver_param {
        IR3_DP_NUM_WORK_GROUPS_X = 0,
        IR3_DP_NUM_WORK_GROUPS_Y = 1,
        IR3_DP_NUM_WORK_GROUPS_Z = 2,
+       IR3_DP_LOCAL_GROUP_SIZE_X = 4,
+       IR3_DP_LOCAL_GROUP_SIZE_Y = 5,
+       IR3_DP_LOCAL_GROUP_SIZE_Z = 6,
        /* NOTE: gl_NumWorkGroups should be vec4 aligned because
         * glDispatchComputeIndirect() needs to load these from
         * the info->indirect buffer.  Keep that in mind when/if
         * adding any addition CS driver params.
         */
-       IR3_DP_CS_COUNT   = 4,   /* must be aligned to vec4 */
+       IR3_DP_CS_COUNT   = 8,   /* must be aligned to vec4 */
 
        /* vertex shader driver params: */
        IR3_DP_VTXID_BASE = 0,