v3dv/cmd_buffer: support for push constants
authorAlejandro Piñeiro <apinheiro@igalia.com>
Fri, 11 Sep 2020 21:26:07 +0000 (23:26 +0200)
committerMarge Bot <eric+marge@anholt.net>
Tue, 13 Oct 2020 21:21:27 +0000 (21:21 +0000)
By default they are trivially lowered to load_uniform.

We still need to allocate an UBO for push constants, used for those
that are accessed using a non-const index. This is automatically
handled by the compiler, as it cames back as asking a
QUNIFORM_UBO_ADDR. This is what already does for gallium.

Note that if needing the UBO, we are uploading the full push constant
data. An improvement would be to try to upload only the data that
needs to rely on the UBO (so non-const accesses to uniforms).

Also, the code is not handling getting out of space from the UBO
bo. This would be tackled at a different commit.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>

src/broadcom/vulkan/v3dv_cmd_buffer.c
src/broadcom/vulkan/v3dv_device.c
src/broadcom/vulkan/v3dv_pipeline.c
src/broadcom/vulkan/v3dv_private.h
src/broadcom/vulkan/v3dv_uniforms.c

index fe9dcc5..56ea8c2 100644 (file)
@@ -127,6 +127,9 @@ cmd_buffer_create(struct v3dv_device *device,
    cmd_buffer->level = level;
    cmd_buffer->usage_flags = 0;
 
+   cmd_buffer->push_constants_descriptor.offset = 0;
+   cmd_buffer->push_constants_descriptor.bo = NULL;
+
    list_inithead(&cmd_buffer->submit_jobs);
 
    cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_NEW;
@@ -189,6 +192,9 @@ cmd_buffer_destroy(struct v3dv_cmd_buffer *cmd_buffer)
       vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
    }
 
+   if (cmd_buffer->push_constants_descriptor.bo)
+      v3dv_bo_free(cmd_buffer->device, cmd_buffer->push_constants_descriptor.bo);
+
    vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
 }
 
@@ -2011,7 +2017,8 @@ cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer)
 
    if (*dirty & (V3DV_CMD_DIRTY_PIPELINE |
                  V3DV_CMD_DIRTY_VERTEX_BUFFER |
-                 V3DV_CMD_DIRTY_DESCRIPTOR_SETS)) {
+                 V3DV_CMD_DIRTY_DESCRIPTOR_SETS |
+                 V3DV_CMD_DIRTY_PUSH_CONSTANTS)) {
       emit_graphics_pipeline(cmd_buffer);
    }
 
@@ -2250,3 +2257,18 @@ v3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
 
    cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS;
 }
+
+void
+v3dv_CmdPushConstants(VkCommandBuffer commandBuffer,
+                      VkPipelineLayout layout,
+                      VkShaderStageFlags stageFlags,
+                      uint32_t offset,
+                      uint32_t size,
+                      const void *pValues)
+{
+   V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
+
+   memcpy((void*) cmd_buffer->push_constants_data + offset, pValues, size);
+
+   cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+}
index 6e0a58d..ccf2625 100644 (file)
@@ -644,7 +644,7 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
       .maxTexelBufferElements                   = (1ul << 28),
       .maxUniformBufferRange                    = (1ul << 27) - 1,
       .maxStorageBufferRange                    = (1ul << 27) - 1,
-      .maxPushConstantsSize                     = (1ul << 27) - 1,
+      .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
       .maxMemoryAllocationCount                 = mem_size / page_size,
       .maxSamplerAllocationCount                = 64 * 1024,
       .bufferImageGranularity                   = 256, /* A cache line */
index 95e7520..c0df726 100644 (file)
@@ -294,16 +294,30 @@ descriptor_map_add(struct v3dv_descriptor_map *map,
    return index;
 }
 
+
+static void
+lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,
+                         struct v3dv_pipeline *pipeline)
+{
+   assert(instr->intrinsic == nir_intrinsic_load_push_constant);
+
+   /* FIXME: next assert it not something that should happen in general, just
+    * to catch any test example under that case and deal with it
+    */
+   assert(nir_intrinsic_base(instr) == 0);
+
+   instr->intrinsic = nir_intrinsic_load_uniform;
+}
+
 /* Gathers info from the intrinsic (set and binding) and then lowers it so it
  * could be used by the v3d_compiler */
-static bool
+static void
 lower_vulkan_resource_index(nir_builder *b,
                             nir_intrinsic_instr *instr,
                             struct v3dv_pipeline *pipeline,
                             const struct v3dv_pipeline_layout *layout)
 {
-   if (instr->intrinsic != nir_intrinsic_vulkan_resource_index)
-      return false;
+   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);
 
    nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);
 
@@ -324,13 +338,14 @@ lower_vulkan_resource_index(nir_builder *b,
       if (!const_val)
          unreachable("non-constant vulkan_resource_index array index");
 
-      /* Note: although for ubos we should skip index 0 which is used for push
-       * constants, that is already took into account when loading the ubo at
-       * nir_to_vir, so we don't need to do it here again.
-       */
       index = descriptor_map_add(descriptor_map, set, binding,
                                  const_val->u32,
                                  binding_layout->array_size);
+
+      if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+         /* skip index 0 which is used for push constants */
+         index++;
+      }
       break;
    }
 
@@ -342,8 +357,27 @@ lower_vulkan_resource_index(nir_builder *b,
    nir_ssa_def_rewrite_uses(&instr->dest.ssa,
                             nir_src_for_ssa(nir_imm_int(b, index)));
    nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
+                struct v3dv_pipeline *pipeline,
+                const struct v3dv_pipeline_layout *layout)
+{
+   switch (instr->intrinsic) {
 
-   return true;
+   case nir_intrinsic_load_push_constant:
+      lower_load_push_constant(b, instr, pipeline);
+      pipeline->use_push_constants = true;
+      return true;
+
+   case nir_intrinsic_vulkan_resource_index:
+      lower_vulkan_resource_index(b, instr, pipeline, layout);
+      return true;
+
+   default:
+      return false;
+   }
 }
 
 static bool
@@ -361,8 +395,7 @@ lower_impl(nir_function_impl *impl,
          switch (instr->type) {
          case nir_instr_type_intrinsic:
             progress |=
-               lower_vulkan_resource_index(&b, nir_instr_as_intrinsic(instr),
-                                           pipeline, layout);
+               lower_intrinsic(&b, nir_instr_as_intrinsic(instr), pipeline, layout);
             break;
          default:
             break;
index bc60631..83bb55a 100644 (file)
@@ -121,6 +121,8 @@ pack_emit_reloc(void *cl, const void *reloc) {}
 
 #define MAX_SETS 16
 
+#define MAX_PUSH_CONSTANTS_SIZE 128
+
 struct v3dv_instance;
 
 #ifdef USE_V3D_SIMULATOR
@@ -463,6 +465,7 @@ enum v3dv_cmd_dirty_bits {
    V3DV_CMD_DIRTY_PIPELINE                  = 1 << 5,
    V3DV_CMD_DIRTY_VERTEX_BUFFER             = 1 << 6,
    V3DV_CMD_DIRTY_DESCRIPTOR_SETS           = 1 << 7,
+   V3DV_CMD_DIRTY_PUSH_CONSTANTS            = 1 << 8,
 };
 
 
@@ -584,6 +587,11 @@ struct v3dv_cmd_buffer_state {
    uint8_t index_size;
 };
 
+struct v3dv_descriptor {
+   struct v3dv_bo *bo;
+   uint32_t offset;
+};
+
 struct v3dv_cmd_buffer {
    VK_LOADER_DATA _loader_data;
 
@@ -599,6 +607,9 @@ struct v3dv_cmd_buffer {
 
    struct v3dv_cmd_buffer_state state;
 
+   uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
+   struct v3dv_descriptor push_constants_descriptor;
+
    /* List of jobs to submit to the kernel */
    struct list_head submit_jobs;
 };
@@ -727,11 +738,6 @@ struct v3dv_descriptor_pool {
    struct v3dv_descriptor_pool_entry entries[0];
 };
 
-struct v3dv_descriptor {
-   struct v3dv_bo *bo;
-   uint32_t offset;
-};
-
 struct v3dv_descriptor_set {
    struct v3dv_descriptor_pool *pool;
 
@@ -844,6 +850,9 @@ struct v3dv_pipeline {
    /* If the pipeline should emit any of the stencil configuration packets */
    bool emit_stencil_cfg[2];
 
+   /* If the pipeline is using push constants */
+   bool use_push_constants;
+
    /* Packets prepacked during pipeline creation
     */
    uint8_t cfg_bits[cl_packet_length(CFG_BITS)];
index 0d84ff8..20e3188 100644 (file)
@@ -53,6 +53,58 @@ get_descriptor(struct v3dv_descriptor_state *descriptor_state,
    return &set->descriptors[binding_layout->descriptor_index + array_index];
 }
 
+/*
+ * This method checks if the ubo used for push constants is needed to be
+ * updated or not.
+ *
+ * push contants ubo is only used for push constants accessed by a non-const
+ * index.
+ *
+ * FIXME: right now for this cases we are uploading the full
+ * push_constants_data. An improvement would be to upload only the data that
+ * we need to rely on a UBO.
+ */
+static void
+check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer)
+{
+   if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS))
+      return;
+
+   if (cmd_buffer->push_constants_descriptor.bo == NULL) {
+      cmd_buffer->push_constants_descriptor.bo =
+         v3dv_bo_alloc(cmd_buffer->device, MAX_PUSH_CONSTANTS_SIZE, "push constants");
+
+      if (!cmd_buffer->push_constants_descriptor.bo) {
+         fprintf(stderr, "Failed to allocate memory for push constants\n");
+         abort();
+      }
+
+      bool ok = v3dv_bo_map(cmd_buffer->device,
+                            cmd_buffer->push_constants_descriptor.bo,
+                            MAX_PUSH_CONSTANTS_SIZE);
+      if (!ok) {
+         fprintf(stderr, "failed to map push constants buffer\n");
+         abort();
+      }
+   } else {
+      if (cmd_buffer->push_constants_descriptor.offset + MAX_PUSH_CONSTANTS_SIZE <=
+          cmd_buffer->push_constants_descriptor.bo->size) {
+         cmd_buffer->push_constants_descriptor.offset += MAX_PUSH_CONSTANTS_SIZE;
+      } else {
+         /* FIXME: we got out of space for push descriptors. Should we create
+          * a new bo? This could be easier with a uploader
+          */
+      }
+   }
+
+   memcpy(cmd_buffer->push_constants_descriptor.bo->map +
+          cmd_buffer->push_constants_descriptor.offset,
+          cmd_buffer->push_constants_data,
+          MAX_PUSH_CONSTANTS_SIZE);
+
+   cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS;
+}
+
 struct v3dv_cl_reloc
 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
                     struct v3dv_pipeline_stage *p_stage)
@@ -88,6 +140,11 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
          cl_aligned_u32(&uniforms, data);
          break;
 
+      case QUNIFORM_UNIFORM:
+         assert(pipeline->use_push_constants);
+         cl_aligned_u32(&uniforms, cmd_buffer->push_constants_data[data]);
+         break;
+
       case QUNIFORM_VIEWPORT_X_SCALE:
          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
          break;
@@ -115,18 +172,28 @@ v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
             v3d_unit_data_get_offset(data) :
             0; /* FIXME */
 
-         /* UBO index is shift up by 1, to follow gallium (0 is gallium's
-          * constant buffer 0), that is what nir_to_vir expects. But for the
-          * ubo_map below, we start from 0.
+         /* For ubos, index is shifted, as 0 is reserved for push constants.
           */
-         uint32_t index =
-            uinfo->contents[i] == QUNIFORM_UBO_ADDR ?
-            v3d_unit_data_get_unit(data) - 1 :
-            data;
-
-         struct v3dv_descriptor *descriptor =
-            get_descriptor(descriptor_state, map, index);
-         assert(descriptor);
+         struct v3dv_descriptor *descriptor = NULL;
+         if (uinfo->contents[i] == QUNIFORM_UBO_ADDR &&
+             v3d_unit_data_get_unit(data) == 0) {
+            /* This calls is to ensure that the push_constant_ubo is
+             * updated. It already take into account it is should do the
+             * update or not
+             */
+            check_push_constants_ubo(cmd_buffer);
+
+            descriptor = &cmd_buffer->push_constants_descriptor;
+         } else {
+            uint32_t index =
+               uinfo->contents[i] == QUNIFORM_UBO_ADDR ?
+               v3d_unit_data_get_unit(data) - 1 :
+               data;
+
+            descriptor =
+               get_descriptor(descriptor_state, map, index);
+            assert(descriptor);
+         }
 
          cl_aligned_reloc(&job->indirect, &uniforms,
                           descriptor->bo,