radv: Refactor predication for compute queues.
authorTimur Kristóf <timur.kristof@gmail.com>
Thu, 16 Jun 2022 14:40:45 +0000 (16:40 +0200)
committerMarge Bot <emma+marge@anholt.net>
Wed, 20 Jul 2022 19:00:30 +0000 (19:00 +0000)
Initialize the inverted predication VA only when it is used
for the first time.

This is needed to get conditional rendering work correctly with
task shaders because the internal compute cmdbuf may not exist
yet when conditional rendering starts.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16531>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_private.h

index e17c7b2..be9a929 100644 (file)
@@ -533,6 +533,16 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
       cmd_buffer->descriptors[i].push_dirty = false;
    }
 
+   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
+      uint32_t pred_value = 0;
+      uint32_t pred_offset;
+      if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset))
+         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      cmd_buffer->mec_inv_pred_emitted = false;
+      cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+   }
+
    if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
        cmd_buffer->qf == RADV_QUEUE_GENERAL) {
       unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
@@ -6301,18 +6311,56 @@ radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
  * Emulates predication for MEC using COND_EXEC.
  * When the current command buffer is predicating, emit a COND_EXEC packet
  * so that the MEC skips the next few dwords worth of packets.
+ *
+ * To make it work with inverted conditional rendering, we allocate
+ * space in the upload BO and emit some packets to invert the condition.
  */
 static void
-radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords)
+radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
+                                 uint64_t inv_va, bool *inv_emitted, unsigned dwords)
 {
-   if (!cmd_buffer->state.predicating)
+   if (!state->predicating)
       return;
 
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+   uint64_t va = state->predication_va;
+
+   if (!state->predication_type) {
+      /* Invert the condition the first time it is needed. */
+      if (!*inv_emitted) {
+         *inv_emitted = true;
+
+         /* Write 1 to the inverted predication VA. */
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 1);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, inv_va);
+         radeon_emit(cs, inv_va >> 32);
+
+         /* If the API predication VA == 0, skip next command. */
+         radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
+         radeon_emit(cs, va);
+         radeon_emit(cs, va >> 32);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 6); /* 1x COPY_DATA size */
+
+         /* Write 0 to the new predication VA (when the API condition != 0) */
+         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+                            COPY_DATA_WR_CONFIRM);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, 0);
+         radeon_emit(cs, inv_va);
+         radeon_emit(cs, inv_va >> 32);
+      }
+
+      va = inv_va;
+   }
 
    radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
-   radeon_emit(cs, cmd_buffer->state.predication_va);
-   radeon_emit(cs, cmd_buffer->state.predication_va >> 32);
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
    radeon_emit(cs, 0); /* Cache policy */
    radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
 }
@@ -7649,7 +7697,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
       }
 
       if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-         radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */);
+         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+                                          &cmd_buffer->mec_inv_pred_emitted,
+                                          4 /* DISPATCH_INDIRECT size */);
          radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
          radeon_emit(cs, info->va);
          radeon_emit(cs, info->va >> 32);
@@ -7732,7 +7782,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
       }
 
       if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-         radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */);
+         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+                                          &cmd_buffer->mec_inv_pred_emitted,
+                                          5 /* DISPATCH_DIRECT size */);
          predicating = false;
       }
 
@@ -9096,54 +9148,8 @@ radv_CmdBeginConditionalRenderingEXT(
       pred_op = PREDICATION_OP_BOOL64;
    }
 
-
-   if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
-      /* MEC does not support real predication, so we have to emulate that
-       * using COND_EXEC packets before each dispatch.
-       */
-
-      if (!draw_visible) {
-         /* COND_EXEC can only skip the next packet(s) when the condition is 0.
-          * When using inverted conditional rendering, we allocate some space in
-          * the upload BO and emit some packets to invert the condition.
-          */
-
-         uint64_t pred_value = 0, pred_va;
-         unsigned pred_offset;
-
-         radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset);
-         pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
-         /* Write 1 to the new predication VA. */
-         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                         COPY_DATA_WR_CONFIRM);
-         radeon_emit(cs, 1);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, pred_va);
-         radeon_emit(cs, pred_va >> 32);
-
-         /* If the API predication VA == 0, skip next command. */
-         radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
-         radeon_emit(cs, va);
-         radeon_emit(cs, va >> 32);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, 6); /* 1x COPY_DATA size */
-
-         /* Write 0 to the new predication VA (when the API condition != 0) */
-         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-                         COPY_DATA_WR_CONFIRM);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, 0);
-         radeon_emit(cs, pred_va);
-         radeon_emit(cs, pred_va >> 32);
-
-         va = pred_va;
-         draw_visible = true;
-      }
-   } else {
-      /* Enable predication for this command buffer. */
+   /* MEC doesn't support predication, we emulate it elsewhere. */
+   if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
       si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
    }
 
@@ -9152,6 +9158,7 @@ radv_CmdBeginConditionalRenderingEXT(
    cmd_buffer->state.predication_type = draw_visible;
    cmd_buffer->state.predication_op = pred_op;
    cmd_buffer->state.predication_va = va;
+   cmd_buffer->mec_inv_pred_emitted = false;
 }
 
 VKAPI_ATTR void VKAPI_CALL
@@ -9159,11 +9166,8 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
 {
    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 
-   /* Note, MEC doesn't support predication, so we
-    * don't have to emit anything for MEC here.
-    */
+   /* MEC doesn't support predication, no need to emit anything here. */
    if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
-      /* Disable predication for this command buffer. */
       si_emit_set_predication_state(cmd_buffer, false, 0, 0);
    }
 
@@ -9172,6 +9176,7 @@ radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
    cmd_buffer->state.predication_type = -1;
    cmd_buffer->state.predication_op = 0;
    cmd_buffer->state.predication_va = 0;
+   cmd_buffer->mec_inv_pred_emitted = false;
 }
 
 /* VK_EXT_transform_feedback */
index ca1df44..1f36768 100644 (file)
@@ -1634,6 +1634,9 @@ struct radv_cmd_buffer {
    uint32_t gfx9_fence_idx;
    uint64_t gfx9_eop_bug_va;
 
+   uint64_t mec_inv_pred_va;  /* For inverted predication when using MEC. */
+   bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */
+
    /**
     * Whether a query pool has been resetted and we have to flush caches.
     */