From cd59c22325293d17ed76bc803d927407ae0a7abe Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 16 Nov 2020 08:57:59 +0100 Subject: [PATCH] ac,radv: use better export formats for 8-bit when RB+ isn't allowed When RB+ is enabled, R8_UINT/R8_SINT/R8_UNORM should use FP16_ABGR for 2x exporting performance. Otherwise, use 32_R to remove useless instructions needed for 16-bit compressed exports. fossils-db (Vega10): Totals from 8858 (6.35% of 139517) affected shaders: SGPRs: 801248 -> 801210 (-0.00%); split: -0.01%, +0.00% VGPRs: 596224 -> 596120 (-0.02%); split: -0.02%, +0.01% CodeSize: 71462452 -> 71356684 (-0.15%); split: -0.15%, +0.00% MaxWaves: 37097 -> 37105 (+0.02%); split: +0.04%, -0.02% Instrs: 13963177 -> 13950809 (-0.09%); split: -0.09%, +0.00% Cycles: 1476539360 -> 1476489996 (-0.00%); split: -0.00%, +0.00% VMEM: 2363008 -> 2361349 (-0.07%); split: +0.04%, -0.11% SMEM: 550362 -> 549977 (-0.07%); split: +0.01%, -0.08% VClause: 245704 -> 245727 (+0.01%); split: -0.01%, +0.02% SClause: 485161 -> 485104 (-0.01%); split: -0.01%, +0.00% Copies: 1420034 -> 1422310 (+0.16%); split: -0.01%, +0.17% Branches: 518710 -> 518705 (-0.00%) PreSGPRs: 706633 -> 706584 (-0.01%) PreVGPRs: 547163 -> 547007 (-0.03%); split: -0.03%, +0.01% Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/common/ac_shader_util.c | 12 +++++++++++- src/amd/common/ac_shader_util.h | 3 ++- src/amd/vulkan/radv_meta_blit.c | 4 ++-- src/amd/vulkan/radv_meta_blit2d.c | 4 ++-- src/amd/vulkan/radv_meta_clear.c | 4 ++-- src/amd/vulkan/radv_meta_resolve.c | 8 ++++---- src/amd/vulkan/radv_meta_resolve_fs.c | 6 +++--- src/amd/vulkan/radv_pipeline.c | 34 +++++++++++++++++++++++---------- src/amd/vulkan/radv_private.h | 2 +- src/gallium/drivers/radeonsi/si_state.c | 2 +- 10 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index a57b5cac..9243219 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -316,7 +316,8 @@ unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config, return num_input_vgprs; } -void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth, +void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, + bool is_depth, bool use_rbplus, struct ac_spi_color_formats *formats) { /* Alpha is needed for alpha-to-coverage. @@ -349,6 +350,15 @@ void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR; else alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR; + + if (!use_rbplus && format == V_028C70_COLOR_8 && + ntype != V_028C70_NUMBER_SRGB && swap == V_028C70_SWAP_STD) /* R */ { + /* When RB+ is enabled, R8_UNORM should use FP16_ABGR for 2x + * exporting performance. Otherwise, use 32_R to remove useless + * instructions needed for 16-bit compressed exports. + */ + blend = normal = V_028714_SPI_SHADER_32_R; + } break; case V_028C70_COLOR_16: diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index 8afee13..5f52eb9 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -97,7 +97,8 @@ enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config, signed char *face_vgpr_index, signed char *ancillary_vgpr_index); -void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth, +void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, + bool is_depth, bool use_rbplus, struct ac_spi_color_formats *formats); #ifdef __cplusplus diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c index f2bec34..6765760 100644 --- a/src/amd/vulkan/radv_meta_blit.c +++ b/src/amd/vulkan/radv_meta_blit.c @@ -327,7 +327,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, switch (src_iview->aspect_mask) { case VK_IMAGE_ASPECT_COLOR_BIT: { unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout); - fs_key = radv_format_meta_fs_key(dest_image->vk_format); + fs_key = radv_format_meta_fs_key(device, dest_image->vk_format); radv_cmd_buffer_begin_render_pass(cmd_buffer, &(VkRenderPassBeginInfo) { @@ -964,7 +964,7 @@ radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand) VkResult result; for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) { - unsigned key = radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]); + unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]); for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) { VkImageLayout layout = radv_meta_dst_layout_to_layout(j); result = radv_CreateRenderPass(radv_device_to_handle(device), diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c index 536ebb4..91d9edc 100644 --- a/src/amd/vulkan/radv_meta_blit2d.c +++ b/src/amd/vulkan/radv_meta_blit2d.c @@ -288,7 +288,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer, aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT || aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) { - unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format); + unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format); unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout); if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) { @@ -723,7 +723,7 @@ blit2d_init_color_pipeline(struct radv_device *device, uint32_t log2_samples) { VkResult result; - unsigned fs_key = radv_format_meta_fs_key(format); + unsigned fs_key = radv_format_meta_fs_key(device, format); const char *name; mtx_lock(&device->meta_state.mtx); diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 016aa34..0ad283f 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -418,7 +418,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer, } samples_log2 = ffs(samples) - 1; - fs_key = radv_format_meta_fs_key(format); + fs_key = radv_format_meta_fs_key(device, format); if (fs_key == -1) { radv_finishme("color clears incomplete"); @@ -1346,7 +1346,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand) uint32_t samples = 1 << i; for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) { VkFormat format = radv_fs_key_format_exemplars[j]; - unsigned fs_key = radv_format_meta_fs_key(format); + unsigned fs_key = radv_format_meta_fs_key(device, format); assert(!state->clear[i].color_pipelines[fs_key]); res = create_color_renderpass(device, format, samples, diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c index 7a117c0..4269b9b 100644 --- a/src/amd/vulkan/radv_meta_resolve.c +++ b/src/amd/vulkan/radv_meta_resolve.c @@ -285,7 +285,7 @@ radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand) for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) { VkFormat format = radv_fs_key_format_exemplars[i]; - unsigned fs_key = radv_format_meta_fs_key(format); + unsigned fs_key = radv_format_meta_fs_key(device, format); res = create_pass(device, format, &state->resolve.pass[fs_key]); if (res != VK_SUCCESS) goto fail; @@ -316,7 +316,7 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer, { struct radv_device *device = cmd_buffer->device; VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); - unsigned fs_key = radv_format_meta_fs_key(vk_format); + unsigned fs_key = radv_format_meta_fs_key(device, vk_format); cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; @@ -442,7 +442,7 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, if (src_image->info.array_size > 1) radv_finishme("vkCmdResolveImage: multisample array images"); - unsigned fs_key = radv_format_meta_fs_key(dst_image->vk_format); + unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format); /* From the Vulkan 1.0 spec: * @@ -859,7 +859,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer) radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass); - VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dest_iview->vk_format)); + VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format)); if (ret != VK_SUCCESS) { cmd_buffer->record_result = ret; continue; diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c index 6aac4ce..d282b66 100644 --- a/src/amd/vulkan/radv_meta_resolve_fs.c +++ b/src/amd/vulkan/radv_meta_resolve_fs.c @@ -152,7 +152,7 @@ create_resolve_pipeline(struct radv_device *device, { mtx_lock(&device->meta_state.mtx); - unsigned fs_key = radv_format_meta_fs_key(format); + unsigned fs_key = radv_format_meta_fs_key(device, format); VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key]; if (*pipeline) { mtx_unlock(&device->meta_state.mtx); @@ -818,7 +818,7 @@ radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview) { struct radv_device *device = cmd_buffer->device; - unsigned fs_key = radv_format_meta_fs_key(dst_iview->vk_format); + unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format); const uint32_t samples = src_iview->image->info.samples; const uint32_t samples_log2 = ffs(samples) - 1; VkPipeline *pipeline; @@ -1022,7 +1022,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state saved_state; const uint32_t samples = src_image->info.samples; const uint32_t samples_log2 = ffs(samples) - 1; - unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format); + unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format); unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout); VkRenderPass rp; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 3d5b47d..e31c6d8 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -451,11 +451,13 @@ static bool is_dual_src(VkBlendFactor factor) } } -static unsigned radv_choose_spi_color_format(VkFormat vk_format, +static unsigned radv_choose_spi_color_format(const struct radv_device *device, + VkFormat vk_format, bool blend_enable, bool blend_need_alpha) { const struct vk_format_description *desc = vk_format_description(vk_format); + bool use_rbplus = device->physical_device->rad_info.rbplus_allowed; struct ac_spi_color_formats formats = {0}; unsigned format, ntype, swap; @@ -464,7 +466,8 @@ static unsigned radv_choose_spi_color_format(VkFormat vk_format, vk_format_get_first_non_void_channel(vk_format)); swap = radv_translate_colorswap(vk_format, false); - ac_choose_spi_color_formats(format, swap, ntype, false, &formats); + ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, + &formats); if (blend_enable && blend_need_alpha) return formats.blend_alpha; @@ -521,7 +524,8 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline, bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4)); - cf = radv_choose_spi_color_format(attachment->format, + cf = radv_choose_spi_color_format(pipeline->device, + attachment->format, blend_enable, blend->need_src_alpha & (1 << i)); @@ -584,19 +588,29 @@ const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = { VK_FORMAT_A2R10G10B10_SINT_PACK32, }; -unsigned radv_format_meta_fs_key(VkFormat format) +unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format) { - unsigned col_format = radv_choose_spi_color_format(format, false, false); - + unsigned col_format = radv_choose_spi_color_format(device, format, false, false); assert(col_format != V_028714_SPI_SHADER_32_AR); - if (col_format >= V_028714_SPI_SHADER_32_AR) - --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */ - --col_format; /* Skip V_028714_SPI_SHADER_ZERO */ bool is_int8 = format_is_int8(format); bool is_int10 = format_is_int10(format); - return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0); + if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8) + return 8; + else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8) + return 9; + else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10) + return 10; + else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10) + return 11; + else { + if (col_format >= V_028714_SPI_SHADER_32_AR) + --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */ + + --col_format; /* Skip V_028714_SPI_SHADER_ZERO */ + return col_format; + } } static void diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 44c9bf6..6ef7801 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1680,7 +1680,7 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) __tmp &= ~(1 << (stage))) extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS]; -unsigned radv_format_meta_fs_key(VkFormat format); +unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format); struct radv_multisample_state { uint32_t db_eqaa; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 47c19e2..399cd9c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2213,7 +2213,7 @@ static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format { struct ac_spi_color_formats formats = {}; - ac_choose_spi_color_formats(format, swap, ntype, is_depth, &formats); + ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats); surf->spi_shader_col_format = formats.normal; surf->spi_shader_col_format_alpha = formats.alpha; -- 2.7.4