When RB+ is enabled, R8_UINT/R8_SINT/R8_UNORM should use FP16_ABGR
for 2x exporting performance. Otherwise, use 32_R to remove useless
instructions needed for 16-bit compressed exports.
fossils-db (Vega10):
Totals from 8858 (6.35% of 139517) affected shaders:
SGPRs: 801248 -> 801210 (-0.00%); split: -0.01%, +0.00%
VGPRs: 596224 -> 596120 (-0.02%); split: -0.02%, +0.01%
CodeSize:
71462452 ->
71356684 (-0.15%); split: -0.15%, +0.00%
MaxWaves: 37097 -> 37105 (+0.02%); split: +0.04%, -0.02%
Instrs:
13963177 ->
13950809 (-0.09%); split: -0.09%, +0.00%
Cycles:
1476539360 ->
1476489996 (-0.00%); split: -0.00%, +0.00%
VMEM: 2363008 -> 2361349 (-0.07%); split: +0.04%, -0.11%
SMEM: 550362 -> 549977 (-0.07%); split: +0.01%, -0.08%
VClause: 245704 -> 245727 (+0.01%); split: -0.01%, +0.02%
SClause: 485161 -> 485104 (-0.01%); split: -0.01%, +0.00%
Copies: 1420034 -> 1422310 (+0.16%); split: -0.01%, +0.17%
Branches: 518710 -> 518705 (-0.00%)
PreSGPRs: 706633 -> 706584 (-0.01%)
PreVGPRs: 547163 -> 547007 (-0.03%); split: -0.03%, +0.01%
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7512>
return num_input_vgprs;
}
-void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
+ bool is_depth, bool use_rbplus,
struct ac_spi_color_formats *formats)
{
/* Alpha is needed for alpha-to-coverage.
alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
else
alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+
+ if (!use_rbplus && format == V_028C70_COLOR_8 &&
+ ntype != V_028C70_NUMBER_SRGB && swap == V_028C70_SWAP_STD) /* R */ {
+ /* When RB+ is enabled, R8_UNORM should use FP16_ABGR for 2x
+ * exporting performance. Otherwise, use 32_R to remove useless
+ * instructions needed for 16-bit compressed exports.
+ */
+ blend = normal = V_028714_SPI_SHADER_32_R;
+ }
break;
case V_028C70_COLOR_16:
unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
signed char *face_vgpr_index, signed char *ancillary_vgpr_index);
-void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype,
+ bool is_depth, bool use_rbplus,
struct ac_spi_color_formats *formats);
#ifdef __cplusplus
switch (src_iview->aspect_mask) {
case VK_IMAGE_ASPECT_COLOR_BIT: {
unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
- fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+ fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
radv_cmd_buffer_begin_render_pass(cmd_buffer,
&(VkRenderPassBeginInfo) {
VkResult result;
for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- unsigned key = radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]);
+ unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
result = radv_CreateRenderPass(radv_device_to_handle(device),
aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
- unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
+ unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
uint32_t log2_samples)
{
VkResult result;
- unsigned fs_key = radv_format_meta_fs_key(format);
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
const char *name;
mtx_lock(&device->meta_state.mtx);
}
samples_log2 = ffs(samples) - 1;
- fs_key = radv_format_meta_fs_key(format);
+ fs_key = radv_format_meta_fs_key(device, format);
if (fs_key == -1) {
radv_finishme("color clears incomplete");
uint32_t samples = 1 << i;
for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
VkFormat format = radv_fs_key_format_exemplars[j];
- unsigned fs_key = radv_format_meta_fs_key(format);
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
assert(!state->clear[i].color_pipelines[fs_key]);
res = create_color_renderpass(device, format, samples,
for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
VkFormat format = radv_fs_key_format_exemplars[i];
- unsigned fs_key = radv_format_meta_fs_key(format);
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
res = create_pass(device, format, &state->resolve.pass[fs_key]);
if (res != VK_SUCCESS)
goto fail;
{
struct radv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- unsigned fs_key = radv_format_meta_fs_key(vk_format);
+ unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
if (src_image->info.array_size > 1)
radv_finishme("vkCmdResolveImage: multisample array images");
- unsigned fs_key = radv_format_meta_fs_key(dst_image->vk_format);
+ unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
/* From the Vulkan 1.0 spec:
*
radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
- VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dest_iview->vk_format));
+ VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
if (ret != VK_SUCCESS) {
cmd_buffer->record_result = ret;
continue;
{
mtx_lock(&device->meta_state.mtx);
- unsigned fs_key = radv_format_meta_fs_key(format);
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
if (*pipeline) {
mtx_unlock(&device->meta_state.mtx);
struct radv_image_view *dst_iview)
{
struct radv_device *device = cmd_buffer->device;
- unsigned fs_key = radv_format_meta_fs_key(dst_iview->vk_format);
+ unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
const uint32_t samples = src_iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
VkPipeline *pipeline;
struct radv_meta_saved_state saved_state;
const uint32_t samples = src_image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
- unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+ unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
VkRenderPass rp;
}
}
-static unsigned radv_choose_spi_color_format(VkFormat vk_format,
+static unsigned radv_choose_spi_color_format(const struct radv_device *device,
+ VkFormat vk_format,
bool blend_enable,
bool blend_need_alpha)
{
const struct vk_format_description *desc = vk_format_description(vk_format);
+ bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
struct ac_spi_color_formats formats = {0};
unsigned format, ntype, swap;
vk_format_get_first_non_void_channel(vk_format));
swap = radv_translate_colorswap(vk_format, false);
- ac_choose_spi_color_formats(format, swap, ntype, false, &formats);
+ ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus,
+ &formats);
if (blend_enable && blend_need_alpha)
return formats.blend_alpha;
bool blend_enable =
blend->blend_enable_4bit & (0xfu << (i * 4));
- cf = radv_choose_spi_color_format(attachment->format,
+ cf = radv_choose_spi_color_format(pipeline->device,
+ attachment->format,
blend_enable,
blend->need_src_alpha & (1 << i));
VK_FORMAT_A2R10G10B10_SINT_PACK32,
};
-unsigned radv_format_meta_fs_key(VkFormat format)
+unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
{
- unsigned col_format = radv_choose_spi_color_format(format, false, false);
-
+ unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
assert(col_format != V_028714_SPI_SHADER_32_AR);
- if (col_format >= V_028714_SPI_SHADER_32_AR)
- --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
- --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
bool is_int8 = format_is_int8(format);
bool is_int10 = format_is_int10(format);
- return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
+ if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
+ return 8;
+ else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
+ return 9;
+ else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
+ return 10;
+ else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
+ return 11;
+ else {
+ if (col_format >= V_028714_SPI_SHADER_32_AR)
+ --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
+
+ --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
+ return col_format;
+ }
}
static void
__tmp &= ~(1 << (stage)))
extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
-unsigned radv_format_meta_fs_key(VkFormat format);
+unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
struct radv_multisample_state {
uint32_t db_eqaa;
{
struct ac_spi_color_formats formats = {};
- ac_choose_spi_color_formats(format, swap, ntype, is_depth, &formats);
+ ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats);
surf->spi_shader_col_format = formats.normal;
surf->spi_shader_col_format_alpha = formats.alpha;