aco: implement create_tcs_jump_to_epilog()
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 17 Aug 2023 10:17:17 +0000 (12:17 +0200)
committerMarge Bot <emma+marge@anholt.net>
Tue, 22 Aug 2023 06:10:32 +0000 (06:10 +0000)
This implements jumping from the main TCS to the epilog.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24643>

src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_shader_info.h
src/amd/vulkan/radv_aco_shader_info.h

index 57b0e48..25e3b56 100644 (file)
@@ -10876,10 +10876,116 @@ get_arg_for_end(isel_context* ctx, struct ac_arg arg)
    return Operand(get_arg(ctx, arg), get_arg_reg(ctx->args, arg));
 }
 
+static Temp
+get_tcs_out_current_patch_data_offset(isel_context* ctx)
+{
+   Builder bld(ctx->program, ctx->block);
+
+   const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 4u;
+   const unsigned pervertex_output_patch_size =
+      ctx->program->info.tcs.tcs_vertices_out * output_vertex_size;
+   const unsigned output_patch_stride =
+      pervertex_output_patch_size + ctx->program->info.tcs.num_linked_patch_outputs * 4u;
+
+   Temp tcs_rel_ids = get_arg(ctx, ctx->args->tcs_rel_ids);
+   Temp rel_patch_id =
+      bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), tcs_rel_ids, Operand::c32(0u), Operand::c32(8u));
+   Temp patch_offset = bld.v_mul_imm(bld.def(v1), rel_patch_id, output_patch_stride, false);
+
+   Temp tcs_offchip_layout = get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout);
+
+   Temp patch_control_points = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc),
+                                        tcs_offchip_layout, Operand::c32(0x3f));
+
+   Temp num_patches = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+                               tcs_offchip_layout, Operand::c32(0x60006));
+
+   Temp lshs_vertex_stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+                                      tcs_offchip_layout, Operand::c32(0x8000c));
+
+   Temp input_patch_size =
+      bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), patch_control_points, lshs_vertex_stride);
+
+   Temp output_patch0_offset =
+      bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches, input_patch_size);
+
+   Temp output_patch_offset =
+      bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
+                     Operand::c32(pervertex_output_patch_size), output_patch0_offset);
+
+   return bld.nuw().vadd32(bld.def(v1), patch_offset, output_patch_offset);
+}
+
+static Temp
+get_patch_base(isel_context* ctx)
+{
+   Builder bld(ctx->program, ctx->block);
+
+   const unsigned output_vertex_size = ctx->program->info.tcs.num_linked_outputs * 16u;
+   const unsigned pervertex_output_patch_size =
+      ctx->program->info.tcs.tcs_vertices_out * output_vertex_size;
+
+   Temp num_patches =
+      bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+               get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout), Operand::c32(0x60006));
+
+   return bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), num_patches,
+                   Operand::c32(pervertex_output_patch_size));
+}
+
 static void
 create_tcs_jump_to_epilog(isel_context* ctx)
 {
-   /* TODO */
+   Builder bld(ctx->program, ctx->block);
+
+   PhysReg vgpr_start(256); /* VGPR 0 */
+   PhysReg sgpr_start(0);   /* SGPR 0 */
+
+   /* SGPRs */
+   Operand ring_offsets = Operand(get_arg(ctx, ctx->args->ring_offsets));
+   ring_offsets.setFixed(sgpr_start);
+
+   Operand tess_offchip_offset = Operand(get_arg(ctx, ctx->args->tess_offchip_offset));
+   tess_offchip_offset.setFixed(sgpr_start.advance(8u));
+
+   Operand tcs_factor_offset = Operand(get_arg(ctx, ctx->args->tcs_factor_offset));
+   tcs_factor_offset.setFixed(sgpr_start.advance(12u));
+
+   Operand tcs_offchip_layout = Operand(get_arg(ctx, ctx->program->info.tcs.tcs_offchip_layout));
+   tcs_offchip_layout.setFixed(sgpr_start.advance(16u));
+
+   Operand patch_base = Operand(get_patch_base(ctx));
+   patch_base.setFixed(sgpr_start.advance(20u));
+
+   /* VGPRs */
+   Operand tcs_out_current_patch_data_offset = Operand(get_tcs_out_current_patch_data_offset(ctx));
+   tcs_out_current_patch_data_offset.setFixed(vgpr_start);
+
+   Operand invocation_id =
+      bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
+               Operand::c32(8u), Operand::c32(5u));
+   invocation_id.setFixed(vgpr_start.advance(4u));
+
+   Operand rel_patch_id =
+      bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
+                 Operand::c32(0u), Operand::c32(8u), Operand::c32(0u));
+   rel_patch_id.setFixed(vgpr_start.advance(8u));
+
+   Temp continue_pc =
+      convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.tcs.epilog_pc));
+
+   aco_ptr<Pseudo_instruction> jump{
+      create_instruction<Pseudo_instruction>(aco_opcode::p_jump_to_epilog, Format::PSEUDO, 9, 0)};
+   jump->operands[0] = Operand(continue_pc);
+   jump->operands[1] = ring_offsets;
+   jump->operands[2] = tess_offchip_offset;
+   jump->operands[3] = tcs_factor_offset;
+   jump->operands[4] = tcs_offchip_layout;
+   jump->operands[5] = patch_base;
+   jump->operands[6] = tcs_out_current_patch_data_offset;
+   jump->operands[7] = invocation_id;
+   jump->operands[8] = rel_patch_id;
+   ctx->block->instructions.emplace_back(std::move(jump));
 }
 
 static void
@@ -10901,8 +11007,8 @@ create_tcs_end_for_epilog(isel_context* ctx)
    unsigned vgpr = 256 + ctx->args->num_vgprs_used;
 
    Temp rel_patch_id =
-      bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
-               Operand::c32(0u), Operand::c32(8u));
+      bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->tcs_rel_ids),
+                 Operand::c32(0u), Operand::c32(8u), Operand::c32(0u));
    regs.emplace_back(Operand(rel_patch_id, PhysReg{vgpr++}));
 
    Temp invocation_id =
index d9d7c17..241af75 100644 (file)
@@ -108,13 +108,18 @@ struct aco_shader_info {
       bool has_prolog;
    } vs;
    struct {
+      struct ac_arg tcs_offchip_layout;
+
       /* Vulkan only */
       uint32_t num_lds_blocks;
+      struct ac_arg epilog_pc;
+      uint32_t num_linked_outputs;
+      uint32_t num_linked_patch_outputs;
+      uint32_t tcs_vertices_out;
 
       /* OpenGL only */
       bool pass_tessfactors_by_reg;
       unsigned patch_stride;
-      struct ac_arg tcs_offchip_layout;
       struct ac_arg tes_offchip_addr;
       struct ac_arg vs_state_bits;
    } tcs;
index ed01e4d..468e8d3 100644 (file)
@@ -53,6 +53,9 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
    ASSIGN_FIELD(vs.tcs_temp_only_input_mask);
    ASSIGN_FIELD(vs.has_prolog);
    ASSIGN_FIELD(tcs.num_lds_blocks);
+   ASSIGN_FIELD(tcs.num_linked_outputs);
+   ASSIGN_FIELD(tcs.num_linked_patch_outputs);
+   ASSIGN_FIELD(tcs.tcs_vertices_out);
    ASSIGN_FIELD(ps.num_interp);
    ASSIGN_FIELD(ps.spi_ps_input);
    ASSIGN_FIELD(cs.subgroup_size);
@@ -62,6 +65,8 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
    aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d;
    aco_info->ps.epilog_pc = radv_args->ps_epilog_pc;
    aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level);
+   aco_info->tcs.epilog_pc = radv_args->tcs_epilog_pc;
+   aco_info->tcs.tcs_offchip_layout = radv_args->tcs_offchip_layout;
 }
 
 #define ASSIGN_VS_STATE_FIELD(x)    aco_info->state.x = radv->state->x
@@ -89,8 +94,16 @@ static inline void
 radv_aco_convert_tcs_epilog_key(struct aco_tcs_epilog_info *aco_info, const struct radv_tcs_epilog_key *radv,
                                 const struct radv_shader_args *radv_args)
 {
+   aco_info->pass_tessfactors_by_reg = false;
+   ASSIGN_FIELD(tcs_out_patch_fits_subgroup);
    ASSIGN_FIELD(primitive_mode);
    ASSIGN_FIELD(tes_reads_tessfactors);
+
+   aco_info->tcs_offchip_layout = radv_args->tcs_offchip_layout;
+   aco_info->invocation_id = radv_args->invocation_id;
+   aco_info->rel_patch_id = radv_args->rel_patch_id;
+   aco_info->tcs_out_current_patch_data_offset = radv_args->tcs_out_current_patch_data_offset;
+   aco_info->patch_base = radv_args->patch_base;
 }
 
 static inline void