panfrost: Prepare things to get rid of panfrost_shader_state.tripipe
authorBoris Brezillon <boris.brezillon@collabora.com>
Thu, 5 Mar 2020 14:17:31 +0000 (15:17 +0100)
committerBoris Brezillon <boris.brezillon@collabora.com>
Tue, 10 Mar 2020 11:47:34 +0000 (12:47 +0100)
panfrost_shader_state.tripipe is used as a template for shader_meta
desc emission, but shader_meta desc preparation time should be negligible
compared to desc emission time (remember we are writing to non-cacheable
memory here). Let's prepare for generating the the shader_meta desc
entirely at draw time by adding the necessary fields to
panfrost_shader_state.

Note that we might brink back some sort of shader_meta desc caching at
some point, but let's simplify things a bit for now.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4083>

src/gallium/drivers/panfrost/pan_assemble.c
src/gallium/drivers/panfrost/pan_context.h
src/gallium/drivers/panfrost/pan_varyings.c

index 1ef81c5..2ceb5ce 100644 (file)
@@ -81,9 +81,11 @@ panfrost_shader_compile(
                 state->bo = panfrost_bo_create(screen, size, PAN_BO_EXECUTE);
                 memcpy(state->bo->cpu, dst, size);
                 meta->shader = state->bo->gpu | program.first_tag;
+                state->first_tag = program.first_tag;
         } else {
                 /* No shader. Use dummy tag to avoid INSTR_INVALID_ENC */
                 meta->shader = 0x0 | 1;
+                state->first_tag = 1;
         }
 
         util_dynarray_fini(&program.compiled);
@@ -101,19 +103,19 @@ panfrost_shader_compile(
 
         switch (stage) {
         case MESA_SHADER_VERTEX:
-                meta->attribute_count = util_bitcount64(s->info.inputs_read);
-                meta->varying_count = util_bitcount64(s->info.outputs_written);
+                state->attribute_count = util_bitcount64(s->info.inputs_read);
+                state->varying_count = util_bitcount64(s->info.outputs_written);
 
                 if (vertex_id)
-                        meta->attribute_count = MAX2(meta->attribute_count, PAN_VERTEX_ID + 1);
+                        state->attribute_count = MAX2(state->attribute_count, PAN_VERTEX_ID + 1);
 
                 if (instance_id)
-                        meta->attribute_count = MAX2(meta->attribute_count, PAN_INSTANCE_ID + 1);
+                        state->attribute_count = MAX2(state->attribute_count, PAN_INSTANCE_ID + 1);
 
                 break;
         case MESA_SHADER_FRAGMENT:
-                meta->attribute_count = 0;
-                meta->varying_count = util_bitcount64(s->info.inputs_read);
+                state->attribute_count = 0;
+                state->varying_count = util_bitcount64(s->info.inputs_read);
                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
                         state->writes_depth = true;
                 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
@@ -121,8 +123,8 @@ panfrost_shader_compile(
                 break;
         case MESA_SHADER_COMPUTE:
                 /* TODO: images */
-                meta->attribute_count = 0;
-                meta->varying_count = 0;
+                state->attribute_count = 0;
+                state->varying_count = 0;
                 state->shared_size = s->info.cs.shared_size;
                 break;
         default:
@@ -140,7 +142,11 @@ panfrost_shader_compile(
 
         /* Separate as primary uniform count is truncated */
         state->uniform_count = program.uniform_count;
+        state->uniform_cutoff = program.uniform_cutoff;
+        state->work_reg_count = program.work_register_count;
 
+        meta->attribute_count = state->attribute_count;
+        meta->varying_count = state->varying_count;
         meta->midgard1.flags_hi = 8; /* XXX */
 
         unsigned default_vec1_swizzle = panfrost_get_default_swizzle(1);
@@ -148,7 +154,7 @@ panfrost_shader_compile(
         unsigned default_vec4_swizzle = panfrost_get_default_swizzle(4);
 
         /* Iterate the varyings and emit the corresponding descriptor */
-        for (unsigned i = 0; i < meta->varying_count; ++i) {
+        for (unsigned i = 0; i < state->varying_count; ++i) {
                 unsigned location = program.varyings[i];
 
                 /* Default to a vec4 varying */
index b6eb720..2e0c445 100644 (file)
@@ -192,6 +192,9 @@ struct panfrost_shader_state {
 
         /* Non-descript information */
         int uniform_count;
+        unsigned uniform_cutoff;
+        unsigned work_reg_count;
+        unsigned attribute_count;
         bool can_discard;
         bool writes_point_size;
         bool writes_depth;
@@ -202,6 +205,8 @@ struct panfrost_shader_state {
         unsigned stack_size;
         unsigned shared_size;
 
+
+        unsigned int varying_count;
         struct mali_attr_meta varyings[PIPE_MAX_ATTRIBS];
         gl_varying_slot varyings_loc[PIPE_MAX_ATTRIBS];
         struct pipe_stream_output_info stream_output;
@@ -219,6 +224,7 @@ struct panfrost_shader_state {
         /* Should we enable helper invocations */
         bool helper_invocations;
 
+        unsigned first_tag;
         struct panfrost_bo *bo;
 };
 
index 9945944..365237e 100644 (file)
@@ -108,7 +108,7 @@ panfrost_emit_varying_meta(
 {
         struct mali_attr_meta *out = (struct mali_attr_meta *) outptr;
 
-        for (unsigned i = 0; i < ss->tripipe->varying_count; ++i) {
+        for (unsigned i = 0; i < ss->varying_count; ++i) {
                 gl_varying_slot location = ss->varyings_loc[i];
                 int index = -1;
 
@@ -186,8 +186,8 @@ panfrost_emit_varying_descriptor(
 
         /* Allocate the varying descriptor */
 
-        size_t vs_size = sizeof(struct mali_attr_meta) * vs->tripipe->varying_count;
-        size_t fs_size = sizeof(struct mali_attr_meta) * fs->tripipe->varying_count;
+        size_t vs_size = sizeof(struct mali_attr_meta) * vs->varying_count;
+        size_t fs_size = sizeof(struct mali_attr_meta) * fs->varying_count;
 
         struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
         struct panfrost_transfer trans = panfrost_allocate_transient(batch,
@@ -200,7 +200,7 @@ panfrost_emit_varying_descriptor(
          * not, use the provided stream out information to determine the
          * offset, since it was already linked for us. */
 
-        for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+        for (unsigned i = 0; i < vs->varying_count; i++) {
                 gl_varying_slot loc = vs->varyings_loc[i];
 
                 bool special = is_special_varying(loc);
@@ -222,12 +222,12 @@ panfrost_emit_varying_descriptor(
         /* Link up with fragment varyings */
         bool reads_point_coord = fs->reads_point_coord;
 
-        for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
+        for (unsigned i = 0; i < fs->varying_count; i++) {
                 gl_varying_slot loc = fs->varyings_loc[i];
                 signed vs_idx = -1;
 
                 /* Link up */
-                for (unsigned j = 0; j < vs->tripipe->varying_count; ++j) {
+                for (unsigned j = 0; j < vs->varying_count; ++j) {
                         if (vs->varyings_loc[j] == loc) {
                                 vs_idx = j;
                                 break;
@@ -252,7 +252,7 @@ panfrost_emit_varying_descriptor(
 
         /* Figure out how many streamout buffers could be bound */
         unsigned so_count = ctx->streamout.num_targets;
-        for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+        for (unsigned i = 0; i < vs->varying_count; i++) {
                 gl_varying_slot loc = vs->varyings_loc[i];
 
                 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
@@ -331,7 +331,7 @@ panfrost_emit_varying_descriptor(
         struct mali_attr_meta *ovs = (struct mali_attr_meta *) (trans.cpu);
         struct mali_attr_meta *ofs = (struct mali_attr_meta *) (trans.cpu + vs_size);
 
-        for (unsigned i = 0; i < vs->tripipe->varying_count; i++) {
+        for (unsigned i = 0; i < vs->varying_count; i++) {
                 gl_varying_slot loc = vs->varyings_loc[i];
 
                 bool captured = ((vs->so_mask & (1ll << loc)) ? true : false);
@@ -349,7 +349,7 @@ panfrost_emit_varying_descriptor(
                 signed fs_idx = -1;
 
                 /* Link up */
-                for (unsigned j = 0; j < fs->tripipe->varying_count; ++j) {
+                for (unsigned j = 0; j < fs->varying_count; ++j) {
                         if (fs->varyings_loc[j] == loc) {
                                 fs_idx = j;
                                 break;
@@ -364,7 +364,7 @@ panfrost_emit_varying_descriptor(
         }
 
         /* Replace point sprite */
-        for (unsigned i = 0; i < fs->tripipe->varying_count; i++) {
+        for (unsigned i = 0; i < fs->varying_count; i++) {
                 /* If we have a point sprite replacement, handle that here. We
                  * have to translate location first.  TODO: Flip y in shader.
                  * We're already keying ... just time crunch .. */
@@ -398,12 +398,12 @@ panfrost_emit_varying_descriptor(
                 varyings[i].elements |= MALI_ATTR_LINEAR;
                 varyings[i].size += align;
 
-                for (unsigned v = 0; v < vs->tripipe->varying_count; ++v) {
+                for (unsigned v = 0; v < vs->varying_count; ++v) {
                         if (ovs[v].index == i)
                                 ovs[v].src_offset = vs->varyings[v].src_offset + align;
                 }
 
-                for (unsigned f = 0; f < fs->tripipe->varying_count; ++f) {
+                for (unsigned f = 0; f < fs->varying_count; ++f) {
                         if (ofs[f].index == i)
                                 ofs[f].src_offset = fs->varyings[f].src_offset + align;
                 }