bool fp64_workaround_enabled;
float lower_depth_range_rate;
unsigned generated_indirect_threshold;
+ unsigned query_clear_with_blorp_threshold;
/* HW workarounds */
bool no_16bit;
* implement a workaround for Gfx9.
*/
ANV_PIPE_POST_SYNC_BIT = (1 << 25),
+
+ /* This bit does not exist directly in PIPE_CONTROL. It means that render
+ * target operations related to clearing of queries are ongoing.
+ */
+ ANV_PIPE_QUERY_CLEARS_BIT = (1 << 26),
};
#define ANV_PIPE_FLUSH_BITS ( \
#define ANV_PIPE_GPGPU_BITS ( \
(GFX_VERx10 >= 125 ? ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT : 0))
+/* Things we need to flush before accessing query data using the command
+ * streamer.
+ *
+ * Prior to DG2 experiments show that the command streamer is not coherent
+ * with the tile cache so we need to flush it to make any data visible to CS.
+ *
+ * Otherwise we want to flush the RT cache which is where blorp writes, either
+ * for clearing the query buffer or for clearing the destination buffer in
+ * vkCopyQueryPoolResults().
+ */
+#define ANV_PIPE_QUERY_FLUSH_BITS ( \
+ (GFX_VERx10 < 125 ? ANV_PIPE_TILE_CACHE_FLUSH_BIT : 0) | \
+ ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
+
enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits);
* saying that render target writes are ongoing.
*/
if (bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
- bits &= ~(ANV_PIPE_RENDER_TARGET_BUFFER_WRITES);
+ bits &= ~ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
+
+ /* If the conditions for flushing the query clears are met, we can
+ * toggle the bit off.
+ */
+ if ((bits & ANV_PIPE_QUERY_FLUSH_BITS) == ANV_PIPE_QUERY_FLUSH_BITS &&
+ (bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT |
+ ANV_PIPE_CS_STALL_BIT))) {
+ bits &= ~ANV_PIPE_QUERY_CLEARS_BIT;
+ }
bits &= ~(ANV_PIPE_FLUSH_BITS | ANV_PIPE_STALL_BITS |
ANV_PIPE_END_OF_PIPE_SYNC_BIT);
return VK_SUCCESS;
}
+ /* Flush query clears using blorp so that secondary query writes do not
+ * race with the clear.
+ */
+ if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) {
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_QUERY_FLUSH_BITS |
+ ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT,
+ "query clear flush prior command buffer end");
+ }
+
genX(cmd_buffer_flush_generated_draws)(cmd_buffer);
/* Turn on object level preemption if it is disabled to have it in known
*/
genX(apply_task_urb_workaround)(primary);
+ /* Flush query clears using blorp so that secondary query writes do not
+ * race with the clear.
+ */
+ if (primary->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) {
+ anv_add_pending_pipe_bits(primary,
+ ANV_PIPE_QUERY_FLUSH_BITS |
+ ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT,
+ "query clear flush prior to secondary buffer");
+ }
+
/* The secondary command buffer doesn't know which textures etc. have been
* flushed prior to their execution. Apply those flushes now.
*/
cmd_buffer->state.compute.pipeline_dirty = true;
#endif
+
+#if GFX_VERx10 < 125
+ /* We apparently cannot flush the tile cache (color/depth) from the GPGPU
+ * pipeline. That means query clears will not be visible to query
+ * copy/write. So we need to flush it before going to GPGPU mode.
+ */
+ if (cmd_buffer->state.current_pipeline == _3D &&
+ (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT)) {
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_QUERY_FLUSH_BITS |
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+ "query clear flush prior to GPGPU");
+ }
+#endif
+
#if GFX_VER >= 12
/* From Tigerlake PRM, Volume 2a, PIPELINE_SELECT:
*
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
uint64s_per_slot = 1 + 2 /* availability + size (PostbuildInfoSerializationDesc) */;
break;
- break;
#endif
case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
+ struct anv_physical_device *pdevice = cmd_buffer->device->physical;
+
+ if (queryCount >= pdevice->instance->query_clear_with_blorp_threshold) {
+ anv_cmd_buffer_fill_area(cmd_buffer,
+ anv_query_address(pool, firstQuery),
+ queryCount * pool->stride,
+ 0);
+
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_QUERY_CLEARS_BIT,
+ "vkCmdResetQueryPool of timestamps");
+ return;
+ }
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
}
}
+static void
+emit_query_clear_flush(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_query_pool *pool,
+ const char *reason)
+{
+ if ((cmd_buffer->state.pending_pipe_bits & ANV_PIPE_QUERY_CLEARS_BIT) == 0)
+ return;
+
+ anv_add_pending_pipe_bits(cmd_buffer,
+ ANV_PIPE_QUERY_FLUSH_BITS |
+ ANV_PIPE_END_OF_PIPE_SYNC_BIT,
+ reason);
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+}
+
+
void genX(CmdBeginQuery)(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
struct anv_address query_addr = anv_query_address(pool, query);
+ emit_query_clear_flush(cmd_buffer, pool, "CmdBeginQuery* flush query clears");
+
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
+ emit_query_clear_flush(cmd_buffer, pool,
+ "CmdWriteTimestamp flush query clears");
+
struct mi_builder b;
mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
* to ensure proper ordering of the commands from the 3d pipe and the
* command streamer.
*/
- if (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_RENDER_TARGET_BUFFER_WRITES) {
+ const bool need_flushes =
+ (cmd_buffer->state.pending_pipe_bits &
+ (ANV_PIPE_RENDER_TARGET_BUFFER_WRITES |
+ ANV_PIPE_QUERY_CLEARS_BIT));
+
+ if (need_flushes) {
anv_add_pending_pipe_bits(cmd_buffer,
- ANV_PIPE_TILE_CACHE_FLUSH_BIT |
- ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
+ ANV_PIPE_QUERY_FLUSH_BITS | ANV_PIPE_CS_STALL_BIT,
"CopyQueryPoolResults");
}
- if ((cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
- /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
- * because we're about to copy values from MI commands, we need to
- * stall the command streamer to make sure the PIPE_CONTROL values have
- * landed, otherwise we could see inconsistent values & availability.
- *
- * From the vulkan spec:
- *
- * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
- * previous uses of vkCmdResetQueryPool in the same queue, without
- * any additional synchronization."
- */
- pool->type == VK_QUERY_TYPE_OCCLUSION ||
- pool->type == VK_QUERY_TYPE_TIMESTAMP) {
+ bool need_cs_stall =
+ (cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
+ /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
+ * because we're about to copy values from MI commands, we need to stall
+ * the command streamer to make sure the PIPE_CONTROL values have
+ * landed, otherwise we could see inconsistent values & availability.
+ *
+ * From the vulkan spec:
+ *
+ * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
+ * previous uses of vkCmdResetQueryPool in the same queue, without
+ * any additional synchronization."
+ */
+ pool->type == VK_QUERY_TYPE_OCCLUSION ||
+ pool->type == VK_QUERY_TYPE_TIMESTAMP;
+
+ if (need_cs_stall) {
anv_add_pending_pipe_bits(cmd_buffer,
ANV_PIPE_CS_STALL_BIT,
- "CopyQueryPoolResults");
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+ "CopyQueryPoolResults stall");
}
+ if (need_cs_stall || need_flushes)
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+
struct anv_address dest_addr = anv_address_add(buffer->address, destOffset);
for (uint32_t i = 0; i < queryCount; i++) {
struct anv_address query_addr = anv_query_address(pool, firstQuery + i);