From 6e2e89e6d8870b6c29d1696b4dffd3de713ae494 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 18 Feb 2023 03:40:41 -0500 Subject: [PATCH] amd,radeonsi: change enabled_rb_mask to 64 bits Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_gpu_info.c | 11 +++++++---- src/amd/common/ac_gpu_info.h | 2 +- src/amd/vulkan/radv_query.c | 15 ++++++++------- src/amd/vulkan/si_cmd_buffer.c | 4 ++-- src/gallium/drivers/r600/r600_pipe_common.c | 2 +- src/gallium/drivers/radeonsi/si_query.c | 4 ++-- src/gallium/drivers/radeonsi/si_state.c | 6 +++--- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 9 ++++++--- 8 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index ce38270..59c110e 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1222,7 +1222,10 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) (info->num_cu / (info->num_se * info->max_sa_per_se * cu_group)) * cu_group; memcpy(info->si_tile_mode_array, amdinfo.gb_tile_mode, sizeof(amdinfo.gb_tile_mode)); - info->enabled_rb_mask = amdinfo.enabled_rb_pipes_mask; + + info->enabled_rb_mask = device_info.enabled_rb_pipes_mask; + if (info->drm_minor >= 52) + info->enabled_rb_mask |= (uint64_t)device_info.enabled_rb_pipes_mask_hi << 32; memcpy(info->cik_macrotile_mode_array, amdinfo.gb_macro_tile_mode, sizeof(amdinfo.gb_macro_tile_mode)); @@ -1324,7 +1327,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) info->family == CHIP_NAVI24 || info->family == CHIP_REMBRANDT || info->family == CHIP_VANGOGH) && - util_bitcount(info->enabled_rb_mask) != + util_bitcount64(info->enabled_rb_mask) != info->max_render_backends; /* On GFX10.3, the polarity of AUTO_FLUSH_MODE is inverted. */ @@ -1374,7 +1377,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */ info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se, max_waves_per_tg); - info->num_rb = util_bitcount(info->enabled_rb_mask); + info->num_rb = util_bitcount64(info->enabled_rb_mask); info->max_gflops = (info->gfx_level >= GFX11 ? 256 : 128) * info->num_cu * info->max_gpu_freq_mhz / 1000; info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000); info->has_pcie_bandwidth_info = info->drm_minor >= 51; @@ -1697,7 +1700,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " max_render_backends = %i\n", info->max_render_backends); fprintf(f, " num_tile_pipes = %i\n", info->num_tile_pipes); fprintf(f, " pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes); - fprintf(f, " enabled_rb_mask = 0x%x\n", info->enabled_rb_mask); + fprintf(f, " enabled_rb_mask = 0x%" PRIx64 "\n", info->enabled_rb_mask); fprintf(f, " max_alignment = %u\n", (unsigned)info->max_alignment); fprintf(f, " pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 28cf6cf..c5b1179 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -257,7 +257,7 @@ struct radeon_info { uint32_t max_render_backends; /* number of render backends incl. disabled ones */ uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */ uint32_t pipe_interleave_bytes; - uint32_t enabled_rb_mask; /* GCN harvest config */ + uint64_t enabled_rb_mask; /* bitmask of enabled physical RBs, up to max_render_backends bits */ uint64_t max_alignment; /* from addrlib */ uint32_t pbb_max_alloc_count; diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 217e7cc..67ba3a8 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -90,7 +90,7 @@ build_occlusion_query_shader(struct radv_device *device) * uint64_t dst_offset = dst_stride * global_id.x; * bool available = true; * for (int i = 0; i < db_count; ++i) { - * if (enabled_rb_mask & (1 << i)) { + * if (enabled_rb_mask & BITFIELD64_BIT(i)) { * uint64_t start = src_buf[src_offset + 16 * i]; * uint64_t end = src_buf[src_offset + 16 * i + 8]; * if ((start & (1ull << 63)) && (end & (1ull << 63))) @@ -120,7 +120,7 @@ build_occlusion_query_shader(struct radv_device *device) nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); - unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; + uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; unsigned db_count = device->physical_device->rad_info.max_render_backends; nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4); @@ -145,7 +145,8 @@ build_occlusion_query_shader(struct radv_device *device) radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count)); nir_ssa_def *enabled_cond = - nir_iand_imm(&b, nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count), enabled_rb_mask); + nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), + enabled_rb_mask); nir_push_if(&b, nir_i2b(&b, enabled_cond)); @@ -1242,14 +1243,14 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first case VK_QUERY_TYPE_OCCLUSION: { uint64_t const *src64 = (uint64_t const *)src; uint32_t db_count = device->physical_device->rad_info.max_render_backends; - uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; + uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask; uint64_t sample_count = 0; available = 1; for (int i = 0; i < db_count; ++i) { uint64_t start, end; - if (!(enabled_rb_mask & (1 << i))) + if (!(enabled_rb_mask & (1ull << i))) continue; do { @@ -1534,8 +1535,8 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo switch (pool->type) { case VK_QUERY_TYPE_OCCLUSION: if (flags & VK_QUERY_RESULT_WAIT_BIT) { - unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask; - uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4; + uint64_t enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask; + uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4; for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) { unsigned query = firstQuery + i; uint64_t src_va = va + query * pool->stride + rb_avail_offset; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index ff5443b..6641cc8 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -175,7 +175,7 @@ static void si_set_raster_config(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs) { unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16); - unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; + uint64_t rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned raster_config, raster_config_1; ac_get_raster_config(&physical_device->rad_info, &raster_config, &raster_config_1, NULL); @@ -183,7 +183,7 @@ si_set_raster_config(struct radv_physical_device *physical_device, struct radeon /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ - if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { + if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); if (physical_device->rad_info.gfx_level >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index de9d2e2..02a2a78 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -1320,7 +1320,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, printf("num_render_backends = %i\n", rscreen->info.max_render_backends); printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); - printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask); + printf("enabled_rb_mask = 0x%" PRIx64 "\n", rscreen->info.enabled_rb_mask); printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment); } diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index cd7f250..d5135c6 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -617,7 +617,7 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_ query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE || query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) { unsigned max_rbs = screen->info.max_render_backends; - unsigned enabled_rb_mask = screen->info.enabled_rb_mask; + uint64_t enabled_rb_mask = screen->info.enabled_rb_mask; unsigned num_results; unsigned i, j; @@ -625,7 +625,7 @@ static bool si_query_hw_prepare_buffer(struct si_context *sctx, struct si_query_ num_results = qbuf->buf->b.b.width0 / query->result_size; for (j = 0; j < num_results; j++) { for (i = 0; i < max_rbs; i++) { - if (!(enabled_rb_mask & (1 << i))) { + if (!(enabled_rb_mask & (1ull << i))) { results[(i * 4) + 1] = 0x80000000; results[(i * 4) + 3] = 0x80000000; } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 119c9ac..72e6451 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2433,7 +2433,7 @@ static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, * so don't expose 16 samples there. */ - const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; + const unsigned max_eqaa_samples = util_bitcount64(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; const unsigned max_samples = 8; /* MSAA support without framebuffer attachments. */ @@ -5540,11 +5540,11 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p { struct si_screen *sscreen = sctx->screen; unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16); - unsigned rb_mask = sscreen->info.enabled_rb_mask; + uint64_t rb_mask = sscreen->info.enabled_rb_mask; unsigned raster_config = sscreen->pa_sc_raster_config; unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; - if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { + if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) { /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 2dca061..21c815f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -443,9 +443,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) * This fails (silently) on non-GCN or older kernels, overwriting the * default enabled_rb_mask with the result of the last query. */ - if (ws->gen >= DRV_SI) - radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, - &ws->info.enabled_rb_mask); + if (ws->gen >= DRV_SI) { + uint32_t mask; + + radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, &mask); + ws->info.enabled_rb_mask = mask; + } ws->info.r600_has_virtual_memory = false; -- 2.7.4