pvr: Add initial implementation of vkCmdDispatch().

author Karmjit Mahil <Karmjit.Mahil@imgtec.com>

Mon, 14 Feb 2022 13:49:30 +0000 (13:49 +0000)

committer Marge Bot <emma+marge@anholt.net>

Tue, 3 May 2022 13:41:28 +0000 (13:41 +0000)
author Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Mon, 14 Feb 2022 13:49:30 +0000 (13:49 +0000)
committer Marge Bot <emma+marge@anholt.net>
Tue, 3 May 2022 13:41:28 +0000 (13:41 +0000)
diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c

index b89fc61..a28122e 100644 (file)
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -2496,75 +2496,13 @@ VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
     return VK_SUCCESS;
  }
  
-void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
-                     uint32_t groupCountX,
-                     uint32_t groupCountY,
-                     uint32_t groupCountZ)
-{
-   assert(!"Unimplemented");
-}
-
-void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
-                             VkBuffer _buffer,
-                             VkDeviceSize offset)
-{
-   assert(!"Unimplemented");
-}
-
-void pvr_CmdDraw(VkCommandBuffer commandBuffer,
-                 uint32_t vertexCount,
-                 uint32_t instanceCount,
-                 uint32_t firstVertex,
-                 uint32_t firstInstance)
-{
-   assert(!"Unimplemented");
-}
-
  static void
-pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
-                      const struct pvr_cmd_buffer_draw_state *const draw_state)
-{
-   /* We don't have a state to tell us that base_instance is being used so it
-    * gets used as a boolean - 0 means we'll use a pds program that skips the
-    * base instance addition. If the base_instance gets used (and the last
-    * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
-    * program.
-    *
-    * If base_instance changes then we only need to update the data section.
-    *
-    * The only draw call state that doesn't really matter is the start vertex
-    * as that is handled properly in the VDM state in all cases.
-    */
-   if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
-       (state->draw_state.draw_indirect != draw_state->draw_indirect) ||
-       (state->draw_state.base_instance == 0 &&
-        draw_state->base_instance != 0)) {
-      state->dirty.draw_variant = true;
-   } else if (state->draw_state.base_instance != draw_state->base_instance) {
-      state->dirty.draw_base_instance = true;
-   }
-
-   state->draw_state = *draw_state;
-}
-
-static uint32_t pvr_calc_shared_regs_count(
-   const struct pvr_graphics_pipeline *const gfx_pipeline)
+pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
+                              bool *const push_descriptors_dirty_out)
  {
-   const struct pvr_pipeline_stage_state *const vertex_state =
-      &gfx_pipeline->vertex_shader_state.stage_state;
-   uint32_t shared_regs = vertex_state->const_shared_reg_count +
-                          vertex_state->const_shared_reg_offset;
-
-   if (gfx_pipeline->fragment_shader_state.bo) {
-      const struct pvr_pipeline_stage_state *const fragment_state =
-         &gfx_pipeline->fragment_shader_state.stage_state;
-      uint32_t fragment_regs = fragment_state->const_shared_reg_count +
-                               fragment_state->const_shared_reg_offset;
-
-      shared_regs = MAX2(shared_regs, fragment_regs);
-   }
-
-   return shared_regs;
+   /* TODO: Implement this function, based on ValidatePushDescriptors. */
+   pvr_finishme("Add support for push descriptors!");
+   *push_descriptors_dirty_out = false;
  }
  
  #define PVR_WRITE(_buffer, _value, _offset, _max)                \
@@ -2694,10 +2632,10 @@ static VkResult pvr_setup_descriptor_mappings(
     struct pvr_cmd_buffer *const cmd_buffer,
     enum pvr_stage_allocation stage,
     const struct pvr_stage_allocation_uniform_state *uniform_state,
+   UNUSED const pvr_dev_addr_t *const num_worgroups_buff_addr,
     uint32_t *const uniform_data_offset_out)
  {
     const struct pvr_pds_info *const pds_info = &uniform_state->pds_info;
-   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
     const struct pvr_descriptor_state *desc_state;
     const uint8_t *entries;
     uint32_t *dword_buffer;
@@ -2705,6 +2643,8 @@ static VkResult pvr_setup_descriptor_mappings(
     struct pvr_bo *pvr_bo;
     VkResult result;
  
+   pvr_finishme("Handle num_worgroups_buff_addr");
+
     if (!pds_info->data_size_in_dwords)
        return VK_SUCCESS;
  
@@ -2740,6 +2680,13 @@ static VkResult pvr_setup_descriptor_mappings(
        const struct pvr_const_map_entry *const entry_header =
           (struct pvr_const_map_entry *)entries;
  
+      /* TODO: See if instead of reusing the blend constant buffer type entry,
+       * we can setup a new buffer type specifically for num_workgroups or other
+       * built-in variables. The mappings are setup at pipeline creation when
+       * creating the uniform program.
+       */
+      pvr_finishme("Handle blend constant reuse for compute.");
+
        switch (entry_header->type) {
        case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
           const struct pvr_const_map_entry_literal32 *const literal =
@@ -2766,7 +2713,7 @@ static VkResult pvr_setup_descriptor_mappings(
           /* TODO: Handle push descriptors. */
  
           assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
-         descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set];
+         descriptor_set = desc_state->descriptor_sets[desc_set];
  
           /* TODO: Handle dynamic buffers. */
           descriptor = &descriptor_set->descriptors[binding];
@@ -2878,6 +2825,144 @@ static VkResult pvr_setup_descriptor_mappings(
  
  #undef PVR_WRITE
  
+void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
+                     uint32_t groupCountX,
+                     uint32_t groupCountY,
+                     uint32_t groupCountZ)
+{
+   const uint32_t workgroup_size[] = { groupCountX, groupCountY, groupCountZ };
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   const struct pvr_compute_pipeline *compute_pipeline =
+      state->compute_pipeline;
+   const VkShaderStageFlags push_consts_stage_mask =
+      compute_pipeline->base.layout->push_constants_shader_stages;
+   bool push_descriptors_dirty;
+   struct pvr_sub_cmd *sub_cmd;
+   VkResult result;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+   assert(compute_pipeline);
+
+   if (!groupCountX || !groupCountY || !groupCountZ)
+      return;
+
+   pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_COMPUTE);
+
+   sub_cmd = state->current_sub_cmd;
+
+   sub_cmd->compute.uses_atomic_ops |=
+      compute_pipeline->state.shader.uses_atomic_ops;
+   sub_cmd->compute.uses_barrier |= compute_pipeline->state.shader.uses_barrier;
+
+   if (push_consts_stage_mask & VK_SHADER_STAGE_COMPUTE_BIT) {
+      /* TODO: Add a dirty push constants mask in the cmd_buffer state and
+       * check for dirty compute stage.
+       */
+      pvr_finishme("Add support for push constants.");
+   }
+
+   pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty);
+
+   if (compute_pipeline->state.shader.uses_num_workgroups) {
+      struct pvr_bo *num_workgroups_bo;
+
+      result = pvr_cmd_buffer_upload_general(cmd_buffer,
+                                             workgroup_size,
+                                             sizeof(workgroup_size),
+                                             &num_workgroups_bo);
+      if (result != VK_SUCCESS)
+         return;
+
+      result =
+         pvr_setup_descriptor_mappings(cmd_buffer,
+                                       PVR_STAGE_ALLOCATION_COMPUTE,
+                                       &compute_pipeline->state.uniform,
+                                       &num_workgroups_bo->vma->dev_addr,
+                                       &state->pds_compute_uniform_data_offset);
+      if (result != VK_SUCCESS)
+         return;
+   } else if ((compute_pipeline->base.layout
+                  ->per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COMPUTE] &&
+               state->dirty.compute_desc_dirty) ||
+              state->dirty.compute_pipeline_binding || push_descriptors_dirty) {
+      result =
+         pvr_setup_descriptor_mappings(cmd_buffer,
+                                       PVR_STAGE_ALLOCATION_COMPUTE,
+                                       &compute_pipeline->state.uniform,
+                                       NULL,
+                                       &state->pds_compute_uniform_data_offset);
+      if (result != VK_SUCCESS)
+         return;
+   }
+
+   /* FIXME: Create shared update kernel end emit control stream. */
+   /* FIXME: Create update kernel end emit control stream. */
+}
+
+void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
+                             VkBuffer _buffer,
+                             VkDeviceSize offset)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdDraw(VkCommandBuffer commandBuffer,
+                 uint32_t vertexCount,
+                 uint32_t instanceCount,
+                 uint32_t firstVertex,
+                 uint32_t firstInstance)
+{
+   assert(!"Unimplemented");
+}
+
+static void
+pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
+                      const struct pvr_cmd_buffer_draw_state *const draw_state)
+{
+   /* We don't have a state to tell us that base_instance is being used so it
+    * gets used as a boolean - 0 means we'll use a pds program that skips the
+    * base instance addition. If the base_instance gets used (and the last
+    * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
+    * program.
+    *
+    * If base_instance changes then we only need to update the data section.
+    *
+    * The only draw call state that doesn't really matter is the start vertex
+    * as that is handled properly in the VDM state in all cases.
+    */
+   if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
+       (state->draw_state.draw_indirect != draw_state->draw_indirect) ||
+       (state->draw_state.base_instance == 0 &&
+        draw_state->base_instance != 0)) {
+      state->dirty.draw_variant = true;
+   } else if (state->draw_state.base_instance != draw_state->base_instance) {
+      state->dirty.draw_base_instance = true;
+   }
+
+   state->draw_state = *draw_state;
+}
+
+static uint32_t pvr_calc_shared_regs_count(
+   const struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+   const struct pvr_pipeline_stage_state *const vertex_state =
+      &gfx_pipeline->vertex_shader_state.stage_state;
+   uint32_t shared_regs = vertex_state->const_shared_reg_count +
+                          vertex_state->const_shared_reg_offset;
+
+   if (gfx_pipeline->fragment_shader_state.bo) {
+      const struct pvr_pipeline_stage_state *const fragment_state =
+         &gfx_pipeline->fragment_shader_state.stage_state;
+      uint32_t fragment_regs = fragment_state->const_shared_reg_count +
+                               fragment_state->const_shared_reg_offset;
+
+      shared_regs = MAX2(shared_regs, fragment_regs);
+   }
+
+   return shared_regs;
+}
+
  static void
  pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
                           const uint32_t pds_vertex_uniform_data_offset)
@@ -3963,15 +4048,6 @@ pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
  }
  
  static void
-pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
-                              bool *const push_descriptors_dirty_out)
-{
-   /* TODO: Implement this function, based on ValidatePushDescriptors. */
-   pvr_finishme("Add support for push descriptors!");
-   *push_descriptors_dirty_out = false;
-}
-
-static void
  pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
                                const uint32_t vs_output_size,
                                const bool raster_enable,
@@ -4284,6 +4360,7 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
           cmd_buffer,
           PVR_STAGE_ALLOCATION_FRAGMENT,
           &state->gfx_pipeline->fragment_shader_state.uniform_state,
+         NULL,
           &state->pds_fragment_uniform_data_offset);
        if (result != VK_SUCCESS) {
           mesa_loge("Could not setup fragment descriptor mappings.");
@@ -4298,6 +4375,7 @@ static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
           cmd_buffer,
           PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
           &state->gfx_pipeline->vertex_shader_state.uniform_state,
+         NULL,
           &pds_vertex_uniform_data_offset);
        if (result != VK_SUCCESS) {
           mesa_loge("Could not setup vertex descriptor mappings.");
diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c

index 2d15e37..ce61649 100644 (file)
--- a/src/imagination/vulkan/pvr_pipeline.c
+++ b/src/imagination/vulkan/pvr_pipeline.c
@@ -957,11 +957,16 @@ static VkResult pvr_compute_pipeline_compile(
  
     /* FIXME: Compile the shader. */
  
+   /* FIXME: Remove this hard coding. */
+   compute_pipeline->state.shader.uses_atomic_ops = false;
+   compute_pipeline->state.shader.uses_barrier = false;
+   compute_pipeline->state.shader.uses_num_workgroups = false;
+
     result = pvr_gpu_upload_usc(device,
                                 pvr_usc_compute_shader,
                                 sizeof(pvr_usc_compute_shader),
                                 cache_line_size,
-                               &compute_pipeline->state.bo);
+                               &compute_pipeline->state.shader.bo);
     if (result != VK_SUCCESS)
        return result;
  
@@ -1007,7 +1012,7 @@ static VkResult pvr_compute_pipeline_compile(
        barrier_coefficient,
        false,
        pvr_pds_compute_program_params.usc_temps,
-      compute_pipeline->state.bo->vma->dev_addr,
+      compute_pipeline->state.shader.bo->vma->dev_addr,
        &compute_pipeline->state.primary_program,
        &compute_pipeline->state.primary_program_info,
        NULL);
@@ -1031,7 +1036,7 @@ static VkResult pvr_compute_pipeline_compile(
           barrier_coefficient,
           true,
           pvr_pds_compute_program_params.usc_temps,
-         compute_pipeline->state.bo->vma->dev_addr,
+         compute_pipeline->state.shader.bo->vma->dev_addr,
           &compute_pipeline->state.primary_program_base_workgroup_variant,
           &compute_pipeline->state.primary_program_base_workgroup_variant_info,
           &compute_pipeline->state.base_workgroup_ids_dword_offset);
@@ -1049,7 +1054,7 @@ err_free_uniform_program:
     pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo);
  
  err_free_shader:
-   pvr_bo_free(device, compute_pipeline->state.bo);
+   pvr_bo_free(device, compute_pipeline->state.shader.bo);
  
     return result;
  }
@@ -1139,7 +1144,7 @@ static void pvr_compute_pipeline_destroy(
                                     allocator,
                                     &compute_pipeline->state.uniform.pds_code,
                                     &compute_pipeline->state.uniform.pds_info);
-   pvr_bo_free(device, compute_pipeline->state.bo);
+   pvr_bo_free(device, compute_pipeline->state.shader.bo);
  
     pvr_pipeline_finish(&compute_pipeline->base);
  
diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h

index 31437b0..3df028c 100644 (file)
--- a/src/imagination/vulkan/pvr_private.h
+++ b/src/imagination/vulkan/pvr_private.h
@@ -137,11 +137,11 @@ enum pvr_pipeline_stage_bits {
  #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
     (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
  
-#define PVR_PIPELINE_STAGE_ALL_BITS \
-   (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_TRANSFER_BIT)
+#define PVR_PIPELINE_STAGE_ALL_BITS                                         \
+   (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_COMPUTE_BIT | \
+    PVR_PIPELINE_STAGE_TRANSFER_BIT)
  
-/* TODO: This number must be changed when we add compute support. */
-#define PVR_NUM_SYNC_PIPELINE_STAGES 3U
+#define PVR_NUM_SYNC_PIPELINE_STAGES 4U
  
  /* Warning: Do not define an invalid stage as 0 since other code relies on 0
   * being the first shader stage. This allows for stages to be split or added
@@ -858,6 +858,7 @@ struct pvr_cmd_buffer_state {
     uint32_t pds_vertex_attrib_offset;
  
     uint32_t pds_fragment_uniform_data_offset;
+   uint32_t pds_compute_uniform_data_offset;
  };
  
  static_assert(
@@ -1025,8 +1026,15 @@ struct pvr_compute_pipeline {
     struct pvr_pipeline base;
  
     struct {
-      /* Pointer to a buffer object that contains the shader binary. */
-      struct pvr_bo *bo;
+      struct {
+         /* Pointer to a buffer object that contains the shader binary. */
+         struct pvr_bo *bo;
+
+         bool uses_atomic_ops;
+         bool uses_barrier;
+         /* E.g. GLSL shader uses gl_NumWorkGroups. */
+         bool uses_num_workgroups;
+      } shader;
  
        struct {
           uint32_t base_workgroup : 1;
diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c

index 6b7450b..4c78406 100644 (file)
--- a/src/imagination/vulkan/pvr_queue.c
+++ b/src/imagination/vulkan/pvr_queue.c
@@ -329,7 +329,7 @@ pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)
  
     if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
                       VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
-      assert(!"Unimplemented");
+      stages |= PVR_PIPELINE_STAGE_COMPUTE_BIT;
     }
  
     if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
@@ -667,6 +667,8 @@ static VkResult pvr_process_empty_job(
     uint32_t *stage_flags,
     struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
  {
+   STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES);
+
     for (uint32_t i = 0; i < semaphore_count; i++) {
        PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
author	Karmjit Mahil <Karmjit.Mahil@imgtec.com>
	Mon, 14 Feb 2022 13:49:30 +0000 (13:49 +0000)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 3 May 2022 13:41:28 +0000 (13:41 +0000)
src/imagination/vulkan/pvr_cmd_buffer.c		patch \| blob \| history
src/imagination/vulkan/pvr_pipeline.c		patch \| blob \| history
src/imagination/vulkan/pvr_private.h		patch \| blob \| history
src/imagination/vulkan/pvr_queue.c		patch \| blob \| history