cmd_buffer->descriptors[i].push_dirty = false;
}
+ if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
+ uint32_t pred_value = 0;
+ uint32_t pred_offset;
+ if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset))
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ cmd_buffer->mec_inv_pred_emitted = false;
+ cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+ }
+
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
cmd_buffer->qf == RADV_QUEUE_GENERAL) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
* Emulates predication for MEC using COND_EXEC.
* When the current command buffer is predicating, emit a COND_EXEC packet
* so that the MEC skips the next few dwords worth of packets.
+ *
+ * To make it work with inverted conditional rendering, we allocate
+ * space in the upload BO and emit some packets to invert the condition.
*/
static void
-radv_cs_emit_compute_predication(struct radv_cmd_buffer *cmd_buffer, unsigned dwords)
+radv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
+ uint64_t inv_va, bool *inv_emitted, unsigned dwords)
{
- if (!cmd_buffer->state.predicating)
+ if (!state->predicating)
return;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = state->predication_va;
+
+ if (!state->predication_type) {
+ /* Invert the condition the first time it is needed. */
+ if (!*inv_emitted) {
+ *inv_emitted = true;
+
+ /* Write 1 to the inverted predication VA. */
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 1);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, inv_va);
+ radeon_emit(cs, inv_va >> 32);
+
+ /* If the API predication VA == 0, skip next command. */
+ radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 6); /* 1x COPY_DATA size */
+
+ /* Write 0 to the new predication VA (when the API condition != 0) */
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, inv_va);
+ radeon_emit(cs, inv_va >> 32);
+ }
+
+ va = inv_va;
+ }
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
- radeon_emit(cs, cmd_buffer->state.predication_va);
- radeon_emit(cs, cmd_buffer->state.predication_va >> 32);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
radeon_emit(cs, 0); /* Cache policy */
radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
}
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radv_cs_emit_compute_predication(cmd_buffer, 4 /* DISPATCH_INDIRECT size */);
+ radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+ &cmd_buffer->mec_inv_pred_emitted,
+ 4 /* DISPATCH_INDIRECT size */);
radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, info->va);
radeon_emit(cs, info->va >> 32);
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radv_cs_emit_compute_predication(cmd_buffer, 5 /* DISPATCH_DIRECT size */);
+ radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
+ &cmd_buffer->mec_inv_pred_emitted,
+ 5 /* DISPATCH_DIRECT size */);
predicating = false;
}
pred_op = PREDICATION_OP_BOOL64;
}
-
- if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- /* MEC does not support real predication, so we have to emulate that
- * using COND_EXEC packets before each dispatch.
- */
-
- if (!draw_visible) {
- /* COND_EXEC can only skip the next packet(s) when the condition is 0.
- * When using inverted conditional rendering, we allocate some space in
- * the upload BO and emit some packets to invert the condition.
- */
-
- uint64_t pred_value = 0, pred_va;
- unsigned pred_offset;
-
- radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset);
- pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
- /* Write 1 to the new predication VA. */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, 1);
- radeon_emit(cs, 0);
- radeon_emit(cs, pred_va);
- radeon_emit(cs, pred_va >> 32);
-
- /* If the API predication VA == 0, skip next command. */
- radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, 0);
- radeon_emit(cs, 6); /* 1x COPY_DATA size */
-
- /* Write 0 to the new predication VA (when the API condition != 0) */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, pred_va);
- radeon_emit(cs, pred_va >> 32);
-
- va = pred_va;
- draw_visible = true;
- }
- } else {
- /* Enable predication for this command buffer. */
+ /* MEC doesn't support predication, we emulate it elsewhere. */
+ if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
}
cmd_buffer->state.predication_type = draw_visible;
cmd_buffer->state.predication_op = pred_op;
cmd_buffer->state.predication_va = va;
+ cmd_buffer->mec_inv_pred_emitted = false;
}
VKAPI_ATTR void VKAPI_CALL
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- /* Note, MEC doesn't support predication, so we
- * don't have to emit anything for MEC here.
- */
+ /* MEC doesn't support predication, no need to emit anything here. */
if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
- /* Disable predication for this command buffer. */
si_emit_set_predication_state(cmd_buffer, false, 0, 0);
}
cmd_buffer->state.predication_type = -1;
cmd_buffer->state.predication_op = 0;
cmd_buffer->state.predication_va = 0;
+ cmd_buffer->mec_inv_pred_emitted = false;
}
/* VK_EXT_transform_feedback */