From 55e014336fa69545b71f15c627bb29a7d7c39f7e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 5 Mar 2020 15:17:31 +0100 Subject: [PATCH] panfrost: Prepare things to get rid of panfrost_shader_state.tripipe panfrost_shader_state.tripipe is used as a template for shader_meta desc emission, but shader_meta desc preparation time should be negligible compared to desc emission time (remember we are writing to non-cacheable memory here). Let's prepare for generating the the shader_meta desc entirely at draw time by adding the necessary fields to panfrost_shader_state. Note that we might brink back some sort of shader_meta desc caching at some point, but let's simplify things a bit for now. Signed-off-by: Boris Brezillon Reviewed-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 24 +++++++++++++++--------- src/gallium/drivers/panfrost/pan_context.h | 6 ++++++ src/gallium/drivers/panfrost/pan_varyings.c | 24 ++++++++++++------------ 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index 1ef81c5..2ceb5ce 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -81,9 +81,11 @@ panfrost_shader_compile( state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE); memcpy(state->bo->cpu, dst, size); meta->shader = state->bo->gpu | program.first_tag; + state->first_tag = program.first_tag; } else { /* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */ meta->shader = 0x0 | 1; + state->first_tag = 1; } util_dynarray_fini(&program.compiled); @@ -101,19 +103,19 @@ panfrost_shader_compile( switch (stage) { case MESA_SHADER_VERTEX: - meta->attribute_count = util_bitcount64(s->info.inputs_read); - meta->varying_count = util_bitcount64(s->info.outputs_written); + state->attribute_count = util_bitcount64(s->info.inputs_read); + state->varying_count = util_bitcount64(s->info.outputs_written); if (vertex_id) - meta->attribute_count = MAX2(meta->attribute_count, PAN_VERTEX_ID + 1); + state->attribute_count = MAX2(state->attribute_count, PAN_VERTEX_ID + 1); if (instance_id) - meta->attribute_count = MAX2(meta->attribute_count, PAN_INSTANCE_ID + 1); + state->attribute_count = MAX2(state->attribute_count, PAN_INSTANCE_ID + 1); break; case MESA_SHADER_FRAGMENT: - meta->attribute_count = 0; - meta->varying_count = util_bitcount64(s->info.inputs_read); + state->attribute_count = 0; + state->varying_count = util_bitcount64(s->info.inputs_read); if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) state->writes_depth = true; if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) @@ -121,8 +123,8 @@ panfrost_shader_compile( break; case MESA_SHADER_COMPUTE: /* TODO: images */ - meta->attribute_count = 0; - meta->varying_count = 0; + state->attribute_count = 0; + state->varying_count = 0; state->shared_size = s->info.cs.shared_size; break; default: @@ -140,7 +142,11 @@ panfrost_shader_compile( /* Separate as primary uniform count is truncated */ state->uniform_count = program.uniform_count; + state->uniform_cutoff = program.uniform_cutoff; + state->work_reg_count = program.work_register_count; + meta->attribute_count = state->attribute_count; + meta->varying_count = state->varying_count; meta->midgard1.flags_hi = 8; /* XXX */ unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1); @@ -148,7 +154,7 @@ panfrost_shader_compile( unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4); /* Iterate the varyings and emit the corresponding descriptor */ - for (unsigned i = 0; i < meta->varying_count; ++i) { + for (unsigned i = 0; i < state->varying_count; ++i) { unsigned location = program.varyings[i]; /* Default to a vec4 varying */ diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index b6eb720..2e0c445 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -192,6 +192,9 @@ struct panfrost_shader_state { /* Non-descript information */ int uniform_count; + unsigned uniform_cutoff; + unsigned work_reg_count; + unsigned attribute_count; bool can_discard; bool writes_point_size; bool writes_depth; @@ -202,6 +205,8 @@ struct panfrost_shader_state { unsigned stack_size; unsigned shared_size; + + unsigned int varying_count; struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS]; gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS]; struct pipe_stream_output_info stream_output; @@ -219,6 +224,7 @@ struct panfrost_shader_state { /* Should we enable helper invocations */ bool helper_invocations; + unsigned first_tag; struct panfrost_bo *bo; }; diff --git a/src/gallium/drivers/panfrost/pan_varyings.c b/src/gallium/drivers/panfrost/pan_varyings.c index 9945944..365237e 100644 --- a/src/gallium/drivers/panfrost/pan_varyings.c +++ b/src/gallium/drivers/panfrost/pan_varyings.c @@ -108,7 +108,7 @@ panfrost_emit_varying_meta( { struct mali_attr_meta *out = (struct mali_attr_meta *) outptr; - for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) { + for (unsigned i = 0; i < ss->varying_count; ++i) { gl_varying_slot location = ss->varyings_loc[i]; int index = -1; @@ -186,8 +186,8 @@ panfrost_emit_varying_descriptor( /* Allocate the varying descriptor */ - size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count; - size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count; + size_t vs_size = sizeof(struct mali_attr_meta) * vs->varying_count; + size_t fs_size = sizeof(struct mali_attr_meta) * fs->varying_count; struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx); struct panfrost_transfer trans = panfrost_allocate_transient(batch, @@ -200,7 +200,7 @@ panfrost_emit_varying_descriptor( * not, use the provided stream out information to determine the * offset, since it was already linked for us. */ - for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + for (unsigned i = 0; i < vs->varying_count; i++) { gl_varying_slot loc = vs->varyings_loc[i]; bool special = is_special_varying(loc); @@ -222,12 +222,12 @@ panfrost_emit_varying_descriptor( /* Link up with fragment varyings */ bool reads_point_coord = fs->reads_point_coord; - for (unsigned i = 0; i < fs->tripipe->varying_count; i++) { + for (unsigned i = 0; i < fs->varying_count; i++) { gl_varying_slot loc = fs->varyings_loc[i]; signed vs_idx = -1; /* Link up */ - for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) { + for (unsigned j = 0; j < vs->varying_count; ++j) { if (vs->varyings_loc[j] == loc) { vs_idx = j; break; @@ -252,7 +252,7 @@ panfrost_emit_varying_descriptor( /* Figure out how many streamout buffers could be bound */ unsigned so_count = ctx->streamout.num_targets; - for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + for (unsigned i = 0; i < vs->varying_count; i++) { gl_varying_slot loc = vs->varyings_loc[i]; bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); @@ -331,7 +331,7 @@ panfrost_emit_varying_descriptor( struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu); struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size); - for (unsigned i = 0; i < vs->tripipe->varying_count; i++) { + for (unsigned i = 0; i < vs->varying_count; i++) { gl_varying_slot loc = vs->varyings_loc[i]; bool captured = ((vs->so_mask & (1ll << loc)) ? true : false); @@ -349,7 +349,7 @@ panfrost_emit_varying_descriptor( signed fs_idx = -1; /* Link up */ - for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) { + for (unsigned j = 0; j < fs->varying_count; ++j) { if (fs->varyings_loc[j] == loc) { fs_idx = j; break; @@ -364,7 +364,7 @@ panfrost_emit_varying_descriptor( } /* Replace point sprite */ - for (unsigned i = 0; i < fs->tripipe->varying_count; i++) { + for (unsigned i = 0; i < fs->varying_count; i++) { /* If we have a point sprite replacement, handle that here. We * have to translate location first. TODO: Flip y in shader. * We're already keying ... just time crunch .. */ @@ -398,12 +398,12 @@ panfrost_emit_varying_descriptor( varyings[i].elements |= MALI_ATTR_LINEAR; varyings[i].size += align; - for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) { + for (unsigned v = 0; v < vs->varying_count; ++v) { if (ovs[v].index == i) ovs[v].src_offset = vs->varyings[v].src_offset + align; } - for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) { + for (unsigned f = 0; f < fs->varying_count; ++f) { if (ofs[f].index == i) ofs[f].src_offset = fs->varyings[f].src_offset + align; } -- 2.7.4