ac,radv: use better export formats for 8-bit when RB+ isn't allowed
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 16 Nov 2020 07:57:59 +0000 (08:57 +0100)
committerMarge Bot <eric+marge@anholt.net>
Mon, 23 Nov 2020 17:54:16 +0000 (17:54 +0000)
When RB+ is enabled, R8_UINT/R8_SINT/R8_UNORM should use FP16_ABGR
for 2x exporting performance. Otherwise, use 32_R to remove useless
instructions needed for 16-bit compressed exports.

fossils-db (Vega10):
Totals from 8858 (6.35% of 139517) affected shaders:
SGPRs: 801248 -> 801210 (-0.00%); split: -0.01%, +0.00%
VGPRs: 596224 -> 596120 (-0.02%); split: -0.02%, +0.01%
CodeSize: 71462452 -> 71356684 (-0.15%); split: -0.15%, +0.00%
MaxWaves: 37097 -> 37105 (+0.02%); split: +0.04%, -0.02%
Instrs: 13963177 -> 13950809 (-0.09%); split: -0.09%, +0.00%
Cycles: 1476539360 -> 1476489996 (-0.00%); split: -0.00%, +0.00%
VMEM: 2363008 -> 2361349 (-0.07%); split: +0.04%, -0.11%
SMEM: 550362 -> 549977 (-0.07%); split: +0.01%, -0.08%
VClause: 245704 -> 245727 (+0.01%); split: -0.01%, +0.02%
SClause: 485161 -> 485104 (-0.01%); split: -0.01%, +0.00%
Copies: 1420034 -> 1422310 (+0.16%); split: -0.01%, +0.17%
Branches: 518710 -> 518705 (-0.00%)
PreSGPRs: 706633 -> 706584 (-0.01%)
PreVGPRs: 547163 -> 547007 (-0.03%); split: -0.03%, +0.01%

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7512>

src/amd/common/ac_shader_util.c
src/amd/common/ac_shader_util.h
src/amd/vulkan/radv_meta_blit.c
src/amd/vulkan/radv_meta_blit2d.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_meta_resolve.c
src/amd/vulkan/radv_meta_resolve_fs.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h
src/gallium/drivers/radeonsi/si_state.c

index a57b5ca..9243219 100644 (file)
@@ -316,7 +316,8 @@ unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
    return num_input_vgprs;
 }
 
-void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
+                                 bool is_depth, bool use_rbplus,
                                  struct ac_spi_color_formats *formats)
 {
    /* Alpha is needed for alpha-to-coverage.
@@ -349,6 +350,15 @@ void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
          alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
       else
          alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+
+      if (!use_rbplus && format == V_028C70_COLOR_8 &&
+          ntype != V_028C70_NUMBER_SRGB && swap == V_028C70_SWAP_STD) /* R */ {
+         /* When RB+ is enabled, R8_UNORM should use FP16_ABGR for 2x
+          * exporting performance. Otherwise, use 32_R to remove useless
+          * instructions needed for 16-bit compressed exports.
+          */
+         blend = normal = V_028714_SPI_SHADER_32_R;
+      }
       break;
 
    case V_028C70_COLOR_16:
index 8afee13..5f52eb9 100644 (file)
@@ -97,7 +97,8 @@ enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler
 unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
                                   signed char *face_vgpr_index, signed char *ancillary_vgpr_index);
 
-void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
+                                 bool is_depth, bool use_rbplus,
                                  struct ac_spi_color_formats *formats);
 
 #ifdef __cplusplus
index f2bec34..6765760 100644 (file)
@@ -327,7 +327,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
        switch (src_iview->aspect_mask) {
        case VK_IMAGE_ASPECT_COLOR_BIT: {
                unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
-               fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+               fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
 
                radv_cmd_buffer_begin_render_pass(cmd_buffer,
                                                  &(VkRenderPassBeginInfo) {
@@ -964,7 +964,7 @@ radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
        VkResult result;
 
        for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
-               unsigned key = radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]);
+               unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
                for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
                        VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
                        result = radv_CreateRenderPass(radv_device_to_handle(device),
index 536ebb4..91d9edc 100644 (file)
@@ -288,7 +288,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
                            aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
                            aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
                            aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
-                               unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
+                               unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
                                unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
 
                                if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
@@ -723,7 +723,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
                           uint32_t log2_samples)
 {
        VkResult result;
-       unsigned fs_key = radv_format_meta_fs_key(format);
+       unsigned fs_key = radv_format_meta_fs_key(device, format);
        const char *name;
 
        mtx_lock(&device->meta_state.mtx);
index 016aa34..0ad283f 100644 (file)
@@ -418,7 +418,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
        }
 
        samples_log2 = ffs(samples) - 1;
-       fs_key = radv_format_meta_fs_key(format);
+       fs_key = radv_format_meta_fs_key(device, format);
 
        if (fs_key == -1) {
                radv_finishme("color clears incomplete");
@@ -1346,7 +1346,7 @@ radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
                uint32_t samples = 1 << i;
                for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
                        VkFormat format = radv_fs_key_format_exemplars[j];
-                       unsigned fs_key = radv_format_meta_fs_key(format);
+                       unsigned fs_key = radv_format_meta_fs_key(device, format);
                        assert(!state->clear[i].color_pipelines[fs_key]);
 
                        res = create_color_renderpass(device, format, samples,
index 7a117c0..4269b9b 100644 (file)
@@ -285,7 +285,7 @@ radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
 
        for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
                VkFormat format = radv_fs_key_format_exemplars[i];
-               unsigned fs_key = radv_format_meta_fs_key(format);
+               unsigned fs_key = radv_format_meta_fs_key(device, format);
                res = create_pass(device, format, &state->resolve.pass[fs_key]);
                if (res != VK_SUCCESS)
                        goto fail;
@@ -316,7 +316,7 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 {
        struct radv_device *device = cmd_buffer->device;
        VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-       unsigned fs_key = radv_format_meta_fs_key(vk_format);
+       unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
 
@@ -442,7 +442,7 @@ radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer,
        if (src_image->info.array_size > 1)
                radv_finishme("vkCmdResolveImage: multisample array images");
 
-       unsigned fs_key = radv_format_meta_fs_key(dst_image->vk_format);
+       unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
 
        /* From the Vulkan 1.0 spec:
         *
@@ -859,7 +859,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 
                radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
 
-               VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dest_iview->vk_format));
+               VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
                if (ret != VK_SUCCESS) {
                        cmd_buffer->record_result = ret;
                        continue;
index 6aac4ce..d282b66 100644 (file)
@@ -152,7 +152,7 @@ create_resolve_pipeline(struct radv_device *device,
 {
        mtx_lock(&device->meta_state.mtx);
 
-       unsigned fs_key = radv_format_meta_fs_key(format);
+       unsigned fs_key = radv_format_meta_fs_key(device, format);
        VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
        if (*pipeline) {
                mtx_unlock(&device->meta_state.mtx);
@@ -818,7 +818,7 @@ radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_image_view *dst_iview)
 {
        struct radv_device *device = cmd_buffer->device;
-       unsigned fs_key = radv_format_meta_fs_key(dst_iview->vk_format);
+       unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
        const uint32_t samples = src_iview->image->info.samples;
        const uint32_t samples_log2 = ffs(samples) - 1;
        VkPipeline *pipeline;
@@ -1022,7 +1022,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
        struct radv_meta_saved_state saved_state;
        const uint32_t samples = src_image->info.samples;
        const uint32_t samples_log2 = ffs(samples) - 1;
-       unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+       unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
        unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
        VkRenderPass rp;
 
index 3d5b47d..e31c6d8 100644 (file)
@@ -451,11 +451,13 @@ static bool is_dual_src(VkBlendFactor factor)
        }
 }
 
-static unsigned radv_choose_spi_color_format(VkFormat vk_format,
+static unsigned radv_choose_spi_color_format(const struct radv_device *device,
+                                            VkFormat vk_format,
                                             bool blend_enable,
                                             bool blend_need_alpha)
 {
        const struct vk_format_description *desc = vk_format_description(vk_format);
+       bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
        struct ac_spi_color_formats formats = {0};
        unsigned format, ntype, swap;
 
@@ -464,7 +466,8 @@ static unsigned radv_choose_spi_color_format(VkFormat vk_format,
                                               vk_format_get_first_non_void_channel(vk_format));
        swap = radv_translate_colorswap(vk_format, false);
 
-       ac_choose_spi_color_formats(format, swap, ntype, false, &formats);
+       ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus,
+                                   &formats);
 
        if (blend_enable && blend_need_alpha)
                return formats.blend_alpha;
@@ -521,7 +524,8 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
                        bool blend_enable =
                                blend->blend_enable_4bit & (0xfu << (i * 4));
 
-                       cf = radv_choose_spi_color_format(attachment->format,
+                       cf = radv_choose_spi_color_format(pipeline->device,
+                                                         attachment->format,
                                                          blend_enable,
                                                          blend->need_src_alpha & (1 << i));
 
@@ -584,19 +588,29 @@ const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
        VK_FORMAT_A2R10G10B10_SINT_PACK32,
 };
 
-unsigned radv_format_meta_fs_key(VkFormat format)
+unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
 {
-       unsigned col_format = radv_choose_spi_color_format(format, false, false);
-
+       unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
        assert(col_format != V_028714_SPI_SHADER_32_AR);
-       if (col_format >= V_028714_SPI_SHADER_32_AR)
-               --col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
 
-       --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
        bool is_int8 = format_is_int8(format);
        bool is_int10 = format_is_int10(format);
 
-       return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
+       if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
+               return 8;
+       else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
+               return 9;
+       else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
+               return 10;
+       else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
+               return 11;
+       else {
+               if (col_format >= V_028714_SPI_SHADER_32_AR)
+                       --col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
+
+               --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
+               return col_format;
+       }
 }
 
 static void
index 44c9bf6..6ef7801 100644 (file)
@@ -1680,7 +1680,7 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
             __tmp &= ~(1 << (stage)))
 
 extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
-unsigned radv_format_meta_fs_key(VkFormat format);
+unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
 
 struct radv_multisample_state {
        uint32_t db_eqaa;
index 47c19e2..399cd9c 100644 (file)
@@ -2213,7 +2213,7 @@ static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format
 {
    struct ac_spi_color_formats formats = {};
 
-   ac_choose_spi_color_formats(format, swap, ntype, is_depth, &formats);
+   ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats);
 
    surf->spi_shader_col_format = formats.normal;
    surf->spi_shader_col_format_alpha = formats.alpha;