zink: stop using pipe_stream_output
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Mon, 7 Aug 2023 14:44:13 +0000 (10:44 -0400)
committerMarge Bot <emma+marge@anholt.net>
Tue, 15 Aug 2023 11:54:06 +0000 (11:54 +0000)
nir_xfb_info is much cleaner and simpler to use

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24634>

src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
src/gallium/drivers/zink/zink_compiler.c
src/gallium/drivers/zink/zink_draw.cpp

index 2b7e572..f507e28 100644 (file)
@@ -900,8 +900,7 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var)
    if (var->data.patch)
       spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch);
 
-   if (var->data.explicit_xfb_buffer &&
-       (!glsl_type_is_array(var->type) || glsl_array_size(var->type) == 1  || !glsl_type_is_interface(glsl_without_array(var->type)))) {
+   if (var->data.explicit_xfb_buffer && ctx->nir->xfb_info) {
       spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset);
       spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer);
       spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride);
index 4cec0c7..ca914b3 100644 (file)
@@ -1666,10 +1666,11 @@ find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned locatio
 }
 
 static bool
-is_inlined(const bool *inlined, const struct pipe_stream_output *output)
+is_inlined(const bool *inlined, const nir_xfb_output_info *output)
 {
-   for (unsigned i = 0; i < output->num_components; i++)
-      if (!inlined[output->start_component + i])
+   unsigned num_components = util_bitcount(output->component_mask);
+   for (unsigned i = 0; i < num_components; i++)
+      if (!inlined[output->component_offset + i])
          return false;
    return true;
 }
@@ -1789,70 +1790,50 @@ get_var_slot_count(nir_shader *nir, nir_variable *var)
 }
 
 
-static const struct pipe_stream_output *
-find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
+static const nir_xfb_output_info *
+find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
 {
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *packed_output = &so_info->output[i];
-      if (reverse_map[packed_output->register_index] == slot)
+   for (unsigned i = 0; i < xfb_info->output_count; i++) {
+      const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
+      if (packed_output->location == slot)
          return packed_output;
    }
    return NULL;
 }
 
 static void
-update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream_output_info *so_info,
-               uint64_t outputs_written, bool have_psiz)
-{
-   uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
-   unsigned slot = 0;
-   /* semi-copied from iris */
-   while (outputs_written) {
-      int bit = u_bit_scan64(&outputs_written);
-      /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
-      if (bit == VARYING_SLOT_PSIZ && !have_psiz)
-         continue;
-      reverse_map[slot++] = bit;
-   }
-
-   bool have_fake_psiz = false;
-   nir_foreach_shader_out_variable(var, nir) {
-      if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
-         have_fake_psiz = true;
-   }
-
+update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
+{
    bool inlined[VARYING_SLOT_MAX][4] = {0};
    uint64_t packed = 0;
    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
-   nir_variable *psiz = NULL;
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *output = &so_info->output[i];
+   for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+      const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+      unsigned xfb_components = util_bitcount(output->component_mask);
       /* always set stride to be used during draw */
-      zs->sinfo.stride[output->output_buffer] = so_info->stride[output->output_buffer];
+      zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
-         for (unsigned c = 0; !is_inlined(inlined[reverse_map[output->register_index]], output) && c < output->num_components; c++) {
-            unsigned slot = reverse_map[output->register_index];
-            if (inlined[slot][output->start_component + c])
+         for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
+            unsigned slot = output->location;
+            if (inlined[slot][output->component_offset + c])
                continue;
             nir_variable *var = NULL;
             while (!var && slot < VARYING_SLOT_TESS_MAX)
-               var = find_var_with_location_frac(nir, slot--, output->start_component + c, have_psiz, nir_var_shader_out);
-            slot = reverse_map[output->register_index];
+               var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
+            slot = output->location;
             unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
             if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
                /* if no variable is found for the xfb output, no output exists */
-               inlined[slot][c + output->start_component] = true;
+               inlined[slot][c + output->component_offset] = true;
                continue;
             }
-            if (var->data.location == VARYING_SLOT_PSIZ)
-               psiz = var;
             if (var->data.explicit_xfb_buffer) {
                /* handle dvec3 where gallium splits streamout over 2 registers */
-               for (unsigned j = 0; j < output->num_components; j++)
-                  inlined[slot][c + output->start_component + j] = true;
+               for (unsigned j = 0; j < xfb_components; j++)
+                  inlined[slot][c + output->component_offset + j] = true;
             }
             if (is_inlined(inlined[slot], output))
                continue;
@@ -1864,24 +1845,24 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
             /* if this is the entire variable, try to blast it out during the initial declaration
             * structs must be handled later to ensure accurate analysis
             */
-            if ((num_components == output->num_components ||
-                 num_components < output->num_components ||
-                 (num_components > output->num_components && output->num_components == 4))) {
+            if ((num_components == xfb_components ||
+                 num_components < xfb_components ||
+                 (num_components > xfb_components && xfb_components == 4))) {
                var->data.explicit_xfb_buffer = 1;
-               var->data.xfb.buffer = output->output_buffer;
-               var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
-               var->data.offset = (c + output->dst_offset) * 4;
-               var->data.stream = output->stream;
-               for (unsigned j = 0; j < MIN2(num_components, output->num_components); j++)
-                  inlined[slot][c + output->start_component + j] = true;
+               var->data.xfb.buffer = output->buffer;
+               var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+               var->data.offset = (output->offset + c * sizeof(uint32_t));
+               var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
+               for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
+                  inlined[slot][c + output->component_offset + j] = true;
             } else {
                /* otherwise store some metadata for later */
                packed |= BITFIELD64_BIT(slot);
-               packed_components[slot] += output->num_components;
-               packed_streams[slot] |= BITFIELD_BIT(output->stream);
-               packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
-               for (unsigned j = 0; j < output->num_components; j++)
-                  packed_offsets[output->register_index][j + output->start_component + c] = output->dst_offset + j;
+               packed_components[slot] += xfb_components;
+               packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
+               packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
+               for (unsigned j = 0; j < xfb_components; j++)
+                  packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
             }
          }
       }
@@ -1891,16 +1872,16 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
     * being output with the same stream on the same buffer with increasing offsets, this entire variable
     * can be consolidated into a single output to conserve locations
     */
-   for (unsigned i = 0; i < so_info->num_outputs; i++) {
-      const struct pipe_stream_output *output = &so_info->output[i];
-      unsigned slot = reverse_map[output->register_index];
+   for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
+      const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
+      unsigned slot = output->location;
       if (is_inlined(inlined[slot], output))
          continue;
       if (zs->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->info.gs.active_stream_mask) == 1) {
          nir_variable *var = NULL;
          while (!var)
-            var = find_var_with_location_frac(nir, slot--, output->start_component, have_psiz, nir_var_shader_out);
-         slot = reverse_map[output->register_index];
+            var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
+         slot = output->location;
          unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
          if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
             continue;
@@ -1914,7 +1895,7 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
          /* for each variable, iterate over all the variable's slots and inline the outputs */
          for (unsigned j = 0; j < num_slots; j++) {
             slot = var->data.location + j;
-            const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
+            const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
             if (!packed_output)
                goto out;
 
@@ -1930,20 +1911,20 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
                goto out;
 
             /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
-            uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
+            uint32_t prev_offset = packed_offsets[packed_output->location][0];
             for (unsigned k = 1; k < num_components; k++) {
                /* if the offsets are not incrementing as expected, skip consolidation */
-               if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
+               if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
                   goto out;
-               prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
+               prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
             }
          }
          /* this output can be consolidated: blast out all the data inlined */
          var->data.explicit_xfb_buffer = 1;
-         var->data.xfb.buffer = output->output_buffer;
-         var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
-         var->data.offset = output->dst_offset * 4;
-         var->data.stream = output->stream;
+         var->data.xfb.buffer = output->buffer;
+         var->data.xfb.stride = zs->sinfo.stride[output->buffer];
+         var->data.offset = output->offset;
+         var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
          /* mark all slot components inlined to skip subsequent loop iterations */
          for (unsigned j = 0; j < num_slots; j++) {
             slot = var->data.location + j;
@@ -1956,9 +1937,6 @@ update_so_info(struct zink_shader *zs, nir_shader *nir, const struct pipe_stream
 out:
       unreachable("xfb should be inlined by now!");
    }
-   /* ensure this doesn't get output in the shader by unsetting location */
-   if (have_fake_psiz && psiz)
-      update_psiz_location(nir, psiz);
 }
 
 struct decompose_state {
@@ -3712,6 +3690,8 @@ zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shad
             if (zink_vs_key_base(key)->push_drawid) {
                NIR_PASS_V(nir, lower_drawid);
             }
+         } else {
+            nir->xfb_info = NULL;
          }
          if (zink_vs_key_base(key)->robust_access)
             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
@@ -5424,7 +5404,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
       nir_foreach_shader_out_variable(var, nir)
          var->data.explicit_xfb_buffer = 0;
    if (so_info && so_info->num_outputs && nir->info.outputs_written)
-      update_so_info(ret, nir, so_info, nir->info.outputs_written, have_psiz);
+      update_so_info(ret, nir, nir->info.outputs_written, have_psiz);
    else if (have_psiz) {
       bool have_fake_psiz = false;
       nir_variable *psiz = NULL;
index c436e35..498d2fd 100644 (file)
@@ -991,7 +991,7 @@ zink_draw(struct pipe_context *pctx,
          counter_buffers[i] = VK_NULL_HANDLE;
          if (t) {
             struct zink_resource *res = zink_resource(t->counter_buffer);
-            t->stride = ctx->last_vertex_stage->sinfo.stride[i] * sizeof(uint32_t);
+            t->stride = ctx->last_vertex_stage->sinfo.stride[i];
             zink_batch_reference_resource_rw(batch, res, true);
             if (!ctx->unordered_blitting)
                res->obj->unordered_read = res->obj->unordered_write = false;