static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8, 9, 10};
static unsigned
+radv_get_pipelinestat_query_offset(VkQueryPipelineStatisticFlagBits query)
+{
+ uint32_t idx = ffs(query) - 1;
+ return pipeline_statistics_indices[idx] * 8;
+}
+
+static unsigned
radv_get_pipelinestat_query_size(struct radv_device *device)
{
unsigned num_results = device->physical_device->rad_info.gfx_level >= GFX11 ? 14 : 11;
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
- nir_ssa_def *uses_gds = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_ssa_def *global_id = get_global_ids(&b, 1);
- nir_variable *input_stride = nir_local_variable_create(b.impl, glsl_int_type(), "input_stride");
- nir_push_if(&b, nir_ine_imm(&b, uses_gds, 0));
- {
- nir_store_var(&b, input_stride, nir_imm_int(&b, pipelinestat_block_size * 2 + 8 * 2), 0x1);
- }
- nir_push_else(&b, NULL);
- {
- nir_store_var(&b, input_stride, nir_imm_int(&b, pipelinestat_block_size * 2), 0x1);
- }
- nir_pop_if(&b, NULL);
-
- nir_ssa_def *input_base = nir_imul(&b, nir_load_var(&b, input_stride), global_id);
+ nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_store_var(&b, result, nir_isub(&b, end, start), 0x1);
- nir_push_if(&b,
- nir_iand(&b, nir_i2b(&b, uses_gds),
- nir_imm_bool(&b, 1u << i == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)));
- {
- /* Compute the GDS result if needed. */
- nir_ssa_def *gds_start_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2));
- nir_ssa_def *gds_start = nir_load_ssbo(&b, 1, 64, src_buf, gds_start_offset);
-
- nir_ssa_def *gds_end_offset = nir_iadd(&b, input_base, nir_imm_int(&b, pipelinestat_block_size * 2 + 8));
- nir_ssa_def *gds_end = nir_load_ssbo(&b, 1, 64, src_buf, gds_end_offset);
-
- nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
-
- nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), ngg_gds_result), 0x1);
- }
- nir_pop_if(&b, NULL);
-
/* Store result */
nir_push_if(&b, result_is_64bit);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
- if (pool->uses_gds) {
- /* When the query pool needs GDS (for counting the number of primitives generated by a
- * geometry shader with NGG), allocate 2x64-bit values for begin/end.
- */
- pool->stride += 8 * 2;
- }
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
- uint64_t ngg_gds_result = 0;
do {
available = p_atomic_read(avail_ptr);
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
result = VK_NOT_READY;
- if (pool->uses_gds) {
- /* Compute the result that was copied from GDS. */
- const uint64_t *gds_start = (uint64_t *)(src + pipelinestat_block_size * 2);
- const uint64_t *gds_stop = (uint64_t *)(src + pipelinestat_block_size * 2 + 8);
-
- ngg_gds_result = gds_stop[0] - gds_start[0];
- }
-
const uint64_t *start = (uint64_t *)src;
const uint64_t *stop = (uint64_t *)(src + pipelinestat_block_size);
if (flags & VK_QUERY_RESULT_64_BIT) {
if (pool->pipeline_stats_mask & (1u << i)) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
*dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]];
-
- if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
- *dst += ngg_gds_result;
- }
}
dst++;
}
if (pool->pipeline_stats_mask & (1u << i)) {
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
*dst = stop[pipeline_statistics_indices[i]] - start[pipeline_statistics_indices[i]];
-
- if (pool->uses_gds && (1u << i) == VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT) {
- *dst += ngg_gds_result;
- }
}
dst++;
}
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline, pool->bo,
dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride,
dst_size, queryCount, flags, pool->pipeline_stats_mask,
- pool->availability_offset + 4 * firstQuery, pool->uses_gds);
+ pool->availability_offset + 4 * firstQuery, false);
break;
case VK_QUERY_TYPE_TIMESTAMP:
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
radeon_emit(cs, va >> 32);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
- unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
-
radeon_check_space(cmd_buffer->device->ws, cs, 4);
++cmd_buffer->state.active_pipeline_queries;
radeon_emit(cs, va >> 32);
if (pool->uses_gds) {
- va += pipelinestat_block_size * 2;
+ uint32_t gs_prim_offset =
+ radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+ va += gs_prim_offset;
/* pipeline statistics counter for all streams */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);
EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
if (pool->uses_gds) {
- va += pipelinestat_block_size + 8;
+ uint32_t gs_prim_offset =
+ radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+ va += gs_prim_offset;
/* pipeline statistics counter for all streams */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PIPELINE_STAT_OFFSET, va);