radv: Refactor predication for compute queues.

author Timur Kristóf <timur.kristof@gmail.com>

Thu, 16 Jun 2022 14:40:45 +0000 (16:40 +0200)

committer Marge Bot <emma+marge@anholt.net>

Wed, 20 Jul 2022 19:00:30 +0000 (19:00 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Thu, 16 Jun 2022 14:40:45 +0000 (16:40 +0200)
committer Marge Bot <emma+marge@anholt.net>
Wed, 20 Jul 2022 19:00:30 +0000 (19:00 +0000)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c

index e17c7b2..be9a929 100644 (file)
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -533,6 +533,16 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
        cmd_buffer->descriptors[i].push_dirty = false;
     }
  
+   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
+      uint32_t pred_value = 0;
+      uint32_t pred_offset;
+      if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset))
+         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      cmd_buffer->mec_inv_pred_emitted = false;
+      cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+   }
+
     if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
         cmd_buffer->qf == RADV_QUEUE_GENERAL) {
        unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
@@ -6301,18 +6311,56 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
   * Emulates predication for MEC using COND_EXEC.
   * When the current command buffer is predicating, emit a COND_EXEC packet
   * so that the MEC skips the next few dwords worth of packets.
+ *
+ * To make it work with inverted conditional rendering, we allocate
+ * space in the upload BO and emit some packets to invert the condition.
   */
  static void
-radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords)
+radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
+                                 uint64_t inv_va, bool *inv_emitted, unsigned dwords)
  {
-   if (!cmd_buffer->state.predicating)
+   if (!state->predicating)
        return;
  
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = state->predication_va;
+
+   if (!state->predication_type) {
+      /* Invert the condition the first time it is needed. */
+      if (!*inv_emitted) {
+         *inv_emitted = true;
+
+         /* Write 1 to the inverted predication VA. */
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 1);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, inv_va);
+         radeon_emit(cs, inv_va >> 32);
+
+         /* If the API predication VA == 0, skip next command. */
+         radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 6); /* 1x COPY_DATA size */
+
+         /* Write 0 to the new predication VA (when the API condition != 0) */
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, inv_va);
+         radeon_emit(cs, inv_va >> 32);
+      }
+
+      va = inv_va;
+   }
  
     radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
-   radeon_emit(cs, cmd_buffer->state.predication_va);
-   radeon_emit(cs, cmd_buffer->state.predication_va >> 32);
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
     radeon_emit(cs, 0); /* Cache policy */
     radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
  }
@@ -7649,7 +7697,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
        }
  
        if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-         radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */);
+         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+                                          &cmd_buffer->mec_inv_pred_emitted,
+                                          4 /* DISPATCH_INDIRECT size */);
           radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
           radeon_emit(cs, info->va);
           radeon_emit(cs, info->va >> 32);
@@ -7732,7 +7782,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
        }
  
        if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-         radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */);
+         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+                                          &cmd_buffer->mec_inv_pred_emitted,
+                                          5 /* DISPATCH_DIRECT size */);
           predicating = false;
        }
  
@@ -9096,54 +9148,8 @@ radv_CmdBeginConditionalRenderingEXT(
        pred_op = PREDICATION_OP_BOOL64;
     }
  
-
-   if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-      /* MEC does not support real predication, so we have to emulate that
-       * using COND_EXEC packets before each dispatch.
-       */
-
-      if (!draw_visible) {
-         /* COND_EXEC can only skip the next packet(s) when the condition is 0.
-          * When using inverted conditional rendering, we allocate some space in
-          * the upload BO and emit some packets to invert the condition.
-          */
-
-         uint64_t pred_value = 0, pred_va;
-         unsigned pred_offset;
-
-         radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset);
-         pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
-         /* Write 1 to the new predication VA. */
-         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                         COPY_DATA_WR_CONFIRM);
-         radeon_emit(cs, 1);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, pred_va);
-         radeon_emit(cs, pred_va >> 32);
-
-         /* If the API predication VA == 0, skip next command. */
-         radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
-         radeon_emit(cs, va);
-         radeon_emit(cs, va >> 32);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, 6); /* 1x COPY_DATA size */
-
-         /* Write 0 to the new predication VA (when the API condition != 0) */
-         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                         COPY_DATA_WR_CONFIRM);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, pred_va);
-         radeon_emit(cs, pred_va >> 32);
-
-         va = pred_va;
-         draw_visible = true;
-      }
-   } else {
-      /* Enable predication for this command buffer. */
+   /* MEC doesn't support predication, we emulate it elsewhere. */
+   if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
        si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
     }
  
@@ -9152,6 +9158,7 @@ radv_CmdBeginConditionalRenderingEXT(
     cmd_buffer->state.predication_type = draw_visible;
     cmd_buffer->state.predication_op = pred_op;
     cmd_buffer->state.predication_va = va;
+   cmd_buffer->mec_inv_pred_emitted = false;
  }
  
  VKAPI_ATTR void VKAPI_CALL
@@ -9159,11 +9166,8 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
  {
     RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
  
-   /* Note, MEC doesn't support predication, so we
-    * don't have to emit anything for MEC here.
-    */
+   /* MEC doesn't support predication, no need to emit anything here. */
     if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
-      /* Disable predication for this command buffer. */
        si_emit_set_predication_state(cmd_buffer, false, 0, 0);
     }
  
@@ -9172,6 +9176,7 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
     cmd_buffer->state.predication_type = -1;
     cmd_buffer->state.predication_op = 0;
     cmd_buffer->state.predication_va = 0;
+   cmd_buffer->mec_inv_pred_emitted = false;
  }
  
  /* VK_EXT_transform_feedback */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h

index ca1df44..1f36768 100644 (file)
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1634,6 +1634,9 @@ struct radv_cmd_buffer {
     uint32_t gfx9_fence_idx;
     uint64_t gfx9_eop_bug_va;
  
+   uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
+   bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
+
     /**
      * Whether a query pool has been resetted and we have to flush caches.
      */
author	Timur Kristóf <timur.kristof@gmail.com>
	Thu, 16 Jun 2022 14:40:45 +0000 (16:40 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Wed, 20 Jul 2022 19:00:30 +0000 (19:00 +0000)
src/amd/vulkan/radv_cmd_buffer.c		patch \| blob \| history
src/amd/vulkan/radv_private.h		patch \| blob \| history