}
#define RADV_PGQ_STRIDE 32
-#define RADV_PGQ_STRIDE_GDS (RADV_PGQ_STRIDE + 4 * 2)
+#define RADV_PGQ_STRIDE_GDS (RADV_PGQ_STRIDE + 8 * 2)
static nir_shader *
build_pg_query_shader(struct radv_device *device)
nir_def *avails[2];
avails[0] = nir_channel(&b, load1, 1);
avails[1] = nir_channel(&b, load2, 1);
- nir_def *result_is_available =
- nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), nir_imm_int(&b, 0x80000000)));
+ nir_store_var(&b, available, nir_i2b(&b, nir_iand_imm(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000)), 0x1);
+
+ nir_push_if(&b, uses_gds);
+ {
+ nir_def *gds_avail_start = nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd_imm(&b, input_base, 36), .align_mul = 4);
+ nir_def *gds_avail_end = nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd_imm(&b, input_base, 44), .align_mul = 4);
+ nir_def *gds_result_available =
+ nir_i2b(&b, nir_iand_imm(&b, nir_iand(&b, gds_avail_start, gds_avail_end), 0x80000000));
+
+ nir_store_var(&b, available, nir_iand(&b, nir_load_var(&b, available), gds_result_available), 0x1);
+ }
+ nir_pop_if(&b, NULL);
/* Only compute result if available. */
- nir_push_if(&b, result_is_available);
+ nir_push_if(&b, nir_load_var(&b, available));
/* Pack values. */
nir_def *packed64[2];
nir_def *gds_start =
nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 32)), .align_mul = 4);
nir_def *gds_end =
- nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 36)), .align_mul = 4);
+ nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 40)), .align_mul = 4);
nir_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
}
nir_pop_if(&b, NULL);
- nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
nir_pop_if(&b, NULL);
/* Determine if result is 64 or 32 bit. */
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (pool->uses_gds && device->physical_device->rad_info.gfx_level < GFX11) {
/* When the hardware can use both the legacy and the NGG paths in the same begin/end pair,
- * allocate 2x32-bit values for the GDS counters.
+ * allocate 2x64-bit values for the GDS counters.
*/
pool->stride = RADV_PGQ_STRIDE_GDS;
} else {
break;
}
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
+ const bool uses_gds_query = pool->uses_gds && device->physical_device->rad_info.gfx_level < GFX11;
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
uint64_t primitive_storage_needed;
!(p_atomic_read(src64 + 2) & 0x8000000000000000UL)) {
available = 0;
}
+ if (uses_gds_query && (!(p_atomic_read(src64 + 4) & 0x8000000000000000UL) ||
+ !(p_atomic_read(src64 + 5) & 0x8000000000000000UL))) {
+ available = 0;
+ }
} while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
primitive_storage_needed = src64[2] - src64[0];
- if (pool->uses_gds && device->physical_device->rad_info.gfx_level < GFX11) {
- uint32_t const *src32 = (uint32_t const *)src;
-
+ if (uses_gds_query) {
/* Accumulate the result that was copied from GDS in case NGG shader has been used. */
- primitive_storage_needed += src32[9] - src32[8];
+ primitive_storage_needed += src64[5] - src64[4];
}
if (flags & VK_QUERY_RESULT_64_BIT) {
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ const bool uses_gds_query = pool->uses_gds && cmd_buffer->device->physical_device->rad_info.gfx_level < GFX11;
+
for (unsigned i = 0; i < queryCount; i++) {
unsigned query = firstQuery + i;
uint64_t src_va = va + query * pool->stride;
- radeon_check_space(cmd_buffer->device->ws, cs, 7 * 2);
+ radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
/* Wait on the upper word of the PrimitiveStorageNeeded result. */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000, 0xffffffff);
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 20, 0x80000000, 0xffffffff);
+
+ if (uses_gds_query) {
+ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 36, 0x80000000, 0xffffffff);
+ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 44, 0x80000000, 0xffffffff);
+ }
}
}
if (pool->uses_gds) {
/* generated prim counter */
gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 32);
+ radv_cs_write_data_imm(cs, V_370_ME, va + 36, 0x80000000);
/* Record that the command buffer needs GDS. */
cmd_buffer->gds_needed = true;
if (pool->uses_gds) {
/* generated prim counter */
- gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 36);
+ gfx10_copy_gds_query(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 40);
+ radv_cs_write_data_imm(cs, V_370_ME, va + 44, 0x80000000);
cmd_buffer->state.active_prims_gen_gds_queries--;