panvk: Fix per-instance attribute handling
authorBoris Brezillon <boris.brezillon@collabora.com>
Wed, 29 Sep 2021 11:21:21 +0000 (13:21 +0200)
committerJason Ekstrand <jason.ekstrand@collabora.com>
Wed, 16 Mar 2022 14:57:51 +0000 (09:57 -0500)
We were assuming per-vertex attributes so far. Let's extend the logic
to support per-instance attributes with or without custom instance
divisors.  Now that we've got it all hooked up, we can enable
VK_EXT_vertex_attribute_divisor.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15295>

src/panfrost/vulkan/panvk_private.h
src/panfrost/vulkan/panvk_vX_cmd_buffer.c
src/panfrost/vulkan/panvk_vX_cs.c
src/panfrost/vulkan/panvk_vX_cs.h
src/panfrost/vulkan/panvk_vX_pipeline.c

index 8f08644..74686d0 100644 (file)
@@ -580,6 +580,7 @@ struct panvk_attrib_buf_info {
       struct {
          unsigned stride;
          bool per_instance;
+         uint32_t instance_divisor;
       };
       unsigned special_id;
    };
index f75030c..a5a14cb 100644 (file)
@@ -781,7 +781,7 @@ panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
                                     cmdbuf->state.vb.bufs,
                                     cmdbuf->state.vb.count,
                                     draw, bufs.cpu);
-   panvk_per_arch(emit_attribs)(cmdbuf->device, &pipeline->attribs,
+   panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs,
                                 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count,
                                 attribs.cpu);
 
index d537510..3bc37ac 100644 (file)
@@ -232,23 +232,63 @@ panvk_emit_attrib_buf(const struct panvk_attribs_info *info,
 
    assert(idx < buf_count);
    const struct panvk_attrib_buf *buf = &bufs[idx];
-   unsigned divisor = buf_info->per_instance ?
-                      draw->padded_vertex_count : 0;
-   unsigned stride = divisor && draw->instance_count == 1 ?
-                     0 : buf_info->stride;
    mali_ptr addr = buf->address & ~63ULL;
    unsigned size = buf->size + (buf->address & 63);
+   unsigned divisor =
+      draw->padded_vertex_count * buf_info->instance_divisor;
 
    /* TODO: support instanced arrays */
-   pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
-      if (draw->instance_count > 1 && divisor) {
+   if (draw->instance_count <= 1) {
+      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
+         cfg.type = MALI_ATTRIBUTE_TYPE_1D;
+         cfg.stride = buf_info->per_instance ? 0 : buf_info->stride;
+         cfg.pointer = addr;
+         cfg.size = size;
+      }
+   } else if (!buf_info->per_instance) {
+      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
-         cfg.divisor = divisor;
+         cfg.divisor = draw->padded_vertex_count;
+         cfg.stride = buf_info->stride;
+         cfg.pointer = addr;
+         cfg.size = size;
+      }
+   } else if (!divisor) {
+      /* instance_divisor == 0 means all instances share the same value.
+       * Make it a 1D array with a zero stride.
+       */
+      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
+         cfg.type = MALI_ATTRIBUTE_TYPE_1D;
+         cfg.stride = 0;
+         cfg.pointer = addr;
+         cfg.size = size;
+      }
+   } else if (util_is_power_of_two_or_zero(divisor)) {
+      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
+         cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
+         cfg.stride = buf_info->stride;
+         cfg.pointer = addr;
+         cfg.size = size;
+         cfg.divisor_r = __builtin_ctz(divisor);
+      }
+   } else {
+      unsigned divisor_r = 0, divisor_e = 0;
+      unsigned divisor_num =
+         panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
+      pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
+         cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
+         cfg.stride = buf_info->stride;
+         cfg.pointer = addr;
+         cfg.size = size;
+         cfg.divisor_r = divisor_r;
+         cfg.divisor_e = divisor_e;
       }
 
-      cfg.pointer = addr;
-      cfg.stride = stride;
-      cfg.size = size;
+      desc += pan_size(ATTRIBUTE_BUFFER);
+      pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
+         cfg.divisor_numerator = divisor_num;
+         cfg.divisor = buf_info->instance_divisor;
+      }
    }
 }
 
@@ -261,8 +301,10 @@ panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
 {
    struct mali_attribute_buffer_packed *buf = descs;
 
-   for (unsigned i = 0; i < info->buf_count; i++)
-      panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++);
+   for (unsigned i = 0; i < info->buf_count; i++) {
+      panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf);
+      buf += 2;
+   }
 }
 
 void
@@ -295,23 +337,31 @@ panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo,
 
 static void
 panvk_emit_attrib(const struct panvk_device *dev,
+                  const struct panvk_draw_info *draw,
                   const struct panvk_attribs_info *attribs,
                   const struct panvk_attrib_buf *bufs,
                   unsigned buf_count,
                   unsigned idx, void *attrib)
 {
    const struct panfrost_device *pdev = &dev->physical_device->pdev;
+   unsigned buf_idx = attribs->attrib[idx].buf;
+   const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx];
 
    pan_pack(attrib, ATTRIBUTE, cfg) {
-      cfg.buffer_index = attribs->attrib[idx].buf;
+      cfg.buffer_index = buf_idx * 2;
       cfg.offset = attribs->attrib[idx].offset +
                    (bufs[cfg.buffer_index].address & 63);
+
+      if (buf_info->per_instance)
+         cfg.offset += draw->first_instance * buf_info->stride;
+
       cfg.format = pdev->formats[attribs->attrib[idx].format].hw;
    }
 }
 
 void
 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
+                             const struct panvk_draw_info *draw,
                              const struct panvk_attribs_info *attribs,
                              const struct panvk_attrib_buf *bufs,
                              unsigned buf_count,
@@ -320,7 +370,7 @@ panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
    struct mali_attribute_packed *attrib = descs;
 
    for (unsigned i = 0; i < attribs->attrib_count; i++)
-      panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++);
+      panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++);
 }
 
 void
index f551410..14b3449 100644 (file)
@@ -57,6 +57,7 @@ panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info,
 
 void
 panvk_per_arch(emit_attribs)(const struct panvk_device *dev,
+                             const struct panvk_draw_info *draw,
                              const struct panvk_attribs_info *attribs,
                              const struct panvk_attrib_buf *bufs,
                              unsigned buf_count,
index 3c8cd0f..70daa5d 100644 (file)
@@ -877,6 +877,9 @@ panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder
          &info->pVertexBindingDescriptions[i];
       attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
       attribs->buf[desc->binding].stride = desc->stride;
+      attribs->buf[desc->binding].per_instance =
+         desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE;
+      attribs->buf[desc->binding].instance_divisor = 1;
       attribs->buf[desc->binding].special = false;
    }