From 80177e0296299cc46a7c78e25717c1a3c2d0d19d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 15 Aug 2023 15:20:16 +0200 Subject: [PATCH] aco: add support for compiling VS+TCS separately on GFX9+ The VS will just jump to the TCS. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 53 +++++++++++++++++++++++++- src/amd/compiler/aco_shader_info.h | 1 + src/amd/vulkan/radv_aco_shader_info.h | 1 + 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 17e738c..83aad96 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11445,6 +11445,36 @@ pops_await_overlapped_waves(isel_context* ctx) bld.reset(ctx->block); } +static void +create_vs_jump_to_tcs(isel_context* ctx) +{ + Builder bld(ctx->program, ctx->block); + std::vector regs; + + for (unsigned i = 0; i < ctx->args->arg_count; i++) { + if (!ctx->args->args[i].preserved) + continue; + + const enum ac_arg_regfile file = ctx->args->args[i].file; + const unsigned reg = ctx->args->args[i].offset; + + Operand op(ctx->arg_temps[i]); + op.setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256}); + regs.emplace_back(op); + } + + Temp continue_pc = + convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc)); + + aco_ptr jump{create_instruction( + aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)}; + jump->operands[0] = Operand(continue_pc); + for (unsigned i = 0; i < regs.size(); i++) { + jump->operands[i + 1] = regs[i]; + } + ctx->block->instructions.emplace_back(std::move(jump)); +} + void select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, const bool need_barrier, if_context* ic_merged_wave_info, const bool check_merged_wave_info, @@ -11521,6 +11551,11 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons } } + if (ctx.stage.hw == AC_HW_HULL_SHADER && ctx.stage.sw == SWStage::VS) { + assert(program->gfx_level >= GFX9); + create_vs_jump_to_tcs(&ctx); + } + cleanup_context(&ctx); } @@ -11651,7 +11686,23 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const if (shader_count >= 2) { select_program_merged(ctx, shader_count, shaders); } else { - select_shader(ctx, shaders[0], true, false, NULL, false, false); + bool need_barrier = false, check_merged_wave_info = false, endif_merged_wave_info = false; + if_context ic_merged_wave_info; + + /* Handle separate compilation of VS+TCS on GFX9+. */ + if (!ctx.program->info.is_monolithic) { + assert(ctx.program->gfx_level >= GFX9); + if (ctx.stage.hw == AC_HW_HULL_SHADER && ctx.stage.sw == SWStage::VS) { + check_merged_wave_info = endif_merged_wave_info = true; + } else { + assert(ctx.stage == tess_control_hs); + check_merged_wave_info = endif_merged_wave_info = true; + need_barrier = true; + } + } + + select_shader(ctx, shaders[0], true, need_barrier, &ic_merged_wave_info, + check_merged_wave_info, endif_merged_wave_info); } program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index b713f1c..4a37e46 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -103,6 +103,7 @@ struct aco_shader_info { unsigned workgroup_size; bool has_epilog; /* Only for TCS or PS. */ bool is_monolithic; + struct ac_arg next_stage_pc; struct { bool tcs_in_out_eq; uint64_t tcs_temp_only_input_mask; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index a8ee61a..8daa209 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -68,6 +68,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level); aco_info->tcs.epilog_pc = radv_args->tcs_epilog_pc; aco_info->tcs.tcs_offchip_layout = radv_args->tcs_offchip_layout; + aco_info->next_stage_pc = radv_args->next_stage_pc; } #define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x -- 2.7.4