From 19eec024d2acc3bb8c39a7c3ce7ca76e34191c62 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 13 Sep 2022 14:54:46 +0200 Subject: [PATCH] radv,aco: do not compact MRTs if the pipeline uses a PS epilog We can't detect color attachment without exports when compiling a PS epilog, so we can't compact MRTs. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 8 ++--- src/amd/vulkan/radv_pipeline.c | 46 ++++++++++++++++++-------- src/amd/vulkan/radv_shader_args.c | 5 +-- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 137c2c0..1a3a2f5 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -12354,7 +12354,6 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade Builder bld(ctx.program, ctx.block); /* Export all color render targets */ - unsigned compacted_mrt_index = 0; bool exported = false; for (unsigned i = 0; i < 8; i++) { @@ -12365,7 +12364,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade struct mrt_color_export out; - out.slot = compacted_mrt_index; + out.slot = i; out.write_mask = 0xf; out.col_format = col_format; out.is_int8 = (key->color_is_int8 >> i) & 1; @@ -12377,10 +12376,7 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1)); } - if (export_fs_mrt_color(&ctx, &out, true)) { - compacted_mrt_index++; - exported = true; - } + exported |= export_fs_mrt_color(&ctx, &out, true); } if (!exported) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 1eb3d5d..e8a1dff 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -576,7 +576,8 @@ radv_compact_spi_shader_col_format(const struct radv_shader *ps, static void radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline, struct radv_blend_state *blend, - const struct vk_graphics_pipeline_state *state) + const struct vk_graphics_pipeline_state *state, + bool has_ps_epilog) { unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0; @@ -611,6 +612,19 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip col_format |= V_028714_SPI_SHADER_32_AR; } + if (has_ps_epilog) { + /* Do not compact MRTs when the pipeline uses a PS epilog because we can't detect color + * attachments without exports. Without compaction and if the i-th target format is set, all + * previous target formats must be non-zero to avoid hangs. + */ + unsigned num_targets = (util_last_bit(col_format) + 3) / 4; + for (unsigned i = 0; i < num_targets; i++) { + if (!(col_format & (0xfu << (i * 4)))) { + col_format |= V_028714_SPI_SHADER_32_R << (i * 4); + } + } + } + /* The output for dual source blending should have the same format as * the first output. */ @@ -699,7 +713,8 @@ radv_blend_check_commutativity(struct radv_blend_state *blend, VkBlendOp op, VkB static struct radv_blend_state radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, - const struct vk_graphics_pipeline_state *state) + const struct vk_graphics_pipeline_state *state, + bool has_ps_epilog) { const struct radv_device *device = pipeline->base.device; struct radv_blend_state blend = {0}; @@ -861,7 +876,7 @@ radv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, else cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE); - radv_pipeline_compute_spi_color_formats(pipeline, &blend, state); + radv_pipeline_compute_spi_color_formats(pipeline, &blend, state, has_ps_epilog); pipeline->cb_color_control = cb_color_control; @@ -6083,7 +6098,8 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv radv_pipeline_layout_hash(&pipeline_layout); - struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state); + struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, &state, + pipeline->ps_epilog); const VkPipelineCreationFeedbackCreateInfo *creation_feedback = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); @@ -6121,7 +6137,9 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv gfx103_pipeline_init_vrs_state(pipeline, &state); struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; - blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend); + if (!ps->info.ps.has_epilog) { + blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend); + } /* Ensure that some export memory is always allocated, for two reasons: * @@ -6144,12 +6162,14 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; } - /* In presense of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap them, - * so that only MRT0 is exported and the driver will compact SPI_SHADER_COL_FORMAT to match what - * the FS actually exports. Though, to make sure the hw remapping works as expected, we should - * also clear color attachments without exports in CB_SHADER_MASK. - */ - blend.cb_shader_mask &= ps->info.ps.colors_written; + if (!ps->info.ps.has_epilog) { + /* In presense of MRT holes (ie. the FS exports MRT1 but not MRT0), the compiler will remap + * them, so that only MRT0 is exported and the driver will compact SPI_SHADER_COL_FORMAT to + * match what the FS actually exports. Though, to make sure the hw remapping works as + * expected, we should also clear color attachments without exports in CB_SHADER_MASK. + */ + blend.cb_shader_mask &= ps->info.ps.colors_written; + } pipeline->col_format = blend.spi_shader_col_format; pipeline->cb_target_mask = blend.cb_target_mask; @@ -6275,7 +6295,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, if ((imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && !(imported_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) { struct radv_blend_state blend = - radv_pipeline_init_blend_state(&pipeline->base, state); + radv_pipeline_init_blend_state(&pipeline->base, state, true); struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend); @@ -6297,7 +6317,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); struct radv_blend_state blend = - radv_pipeline_init_blend_state(&pipeline->base, state); + radv_pipeline_init_blend_state(&pipeline->base, state, pipeline->base.ps_epilog); struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(&pipeline->base, pCreateInfo, state, &blend); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 8bdae45..8719a0f 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -953,8 +953,6 @@ void radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_epilog_key *key, struct radv_shader_args *args) { - unsigned num_inputs = 0; - ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); if (gfx_level < GFX11) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); @@ -966,7 +964,6 @@ radv_declare_ps_epilog_args(enum amd_gfx_level gfx_level, const struct radv_ps_e if (col_format == V_028714_SPI_SHADER_ZERO) continue; - ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[num_inputs]); - num_inputs++; + ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_FLOAT, &args->ps_epilog_inputs[i]); } } -- 2.7.4