aco: add support for compiling VS+TCS separately on GFX9+
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 15 Aug 2023 13:20:16 +0000 (15:20 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 25 Aug 2023 07:22:04 +0000 (07:22 +0000)
The VS will just jump to the TCS.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24697>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_shader_info.h
src/amd/vulkan/radv_aco_shader_info.h

index 17e738c..83aad96 100644 (file)
@@ -11445,6 +11445,36 @@ pops_await_overlapped_waves(isel_context* ctx)
    bld.reset(ctx->block);
 }
 
+static void
+create_vs_jump_to_tcs(isel_context* ctx)
+{
+   Builder bld(ctx->program, ctx->block);
+   std::vector<Operand> regs;
+
+   for (unsigned i = 0; i < ctx->args->arg_count; i++) {
+      if (!ctx->args->args[i].preserved)
+         continue;
+
+      const enum ac_arg_regfile file = ctx->args->args[i].file;
+      const unsigned reg = ctx->args->args[i].offset;
+
+      Operand op(ctx->arg_temps[i]);
+      op.setFixed(PhysReg{file == AC_ARG_SGPR ? reg : reg + 256});
+      regs.emplace_back(op);
+   }
+
+   Temp continue_pc =
+      convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.next_stage_pc));
+
+   aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
+      aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + regs.size(), 0)};
+   jump->operands[0] = Operand(continue_pc);
+   for (unsigned i = 0; i < regs.size(); i++) {
+      jump->operands[i + 1] = regs[i];
+   }
+   ctx->block->instructions.emplace_back(std::move(jump));
+}
+
 void
 select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, const bool need_barrier,
               if_context* ic_merged_wave_info, const bool check_merged_wave_info,
@@ -11521,6 +11551,11 @@ select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, cons
       }
    }
 
+   if (ctx.stage.hw == AC_HW_HULL_SHADER && ctx.stage.sw == SWStage::VS) {
+      assert(program->gfx_level >= GFX9);
+      create_vs_jump_to_tcs(&ctx);
+   }
+
    cleanup_context(&ctx);
 }
 
@@ -11651,7 +11686,23 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
    if (shader_count >= 2) {
       select_program_merged(ctx, shader_count, shaders);
    } else {
-      select_shader(ctx, shaders[0], true, false, NULL, false, false);
+      bool need_barrier = false, check_merged_wave_info = false, endif_merged_wave_info = false;
+      if_context ic_merged_wave_info;
+
+      /* Handle separate compilation of VS+TCS on GFX9+. */
+      if (!ctx.program->info.is_monolithic) {
+         assert(ctx.program->gfx_level >= GFX9);
+         if (ctx.stage.hw == AC_HW_HULL_SHADER && ctx.stage.sw == SWStage::VS) {
+            check_merged_wave_info = endif_merged_wave_info = true;
+         } else {
+            assert(ctx.stage == tess_control_hs);
+            check_merged_wave_info = endif_merged_wave_info = true;
+            need_barrier = true;
+         }
+      }
+
+      select_shader(ctx, shaders[0], true, need_barrier, &ic_merged_wave_info,
+                    check_merged_wave_info, endif_merged_wave_info);
    }
 
    program->config->float_mode = program->blocks[0].fp_mode.val;
index b713f1c..4a37e46 100644 (file)
@@ -103,6 +103,7 @@ struct aco_shader_info {
    unsigned workgroup_size;
    bool has_epilog; /* Only for TCS or PS. */
    bool is_monolithic;
+   struct ac_arg next_stage_pc;
    struct {
       bool tcs_in_out_eq;
       uint64_t tcs_temp_only_input_mask;
index a8ee61a..8daa209 100644 (file)
@@ -68,6 +68,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
    aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level);
    aco_info->tcs.epilog_pc = radv_args->tcs_epilog_pc;
    aco_info->tcs.tcs_offchip_layout = radv_args->tcs_offchip_layout;
+   aco_info->next_stage_pc = radv_args->next_stage_pc;
 }
 
 #define ASSIGN_VS_STATE_FIELD(x)    aco_info->state.x = radv->state->x