From 5632d8d1a777d39c7882dcb011aab4619bcff01a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 7 Jun 2023 23:48:13 -0400 Subject: [PATCH] radeonsi: replace tcs_out_lds_layout with nearly identical tes_offchip_addr tcs_out_lds_layout is basically renamed to tes_offchip_addr in TCS, using the same variable as TES and also using the same bit layout. The only difference in the bit layout was that TCS had to mask out the low bits, which this also removes. The enums are renamed to *_SGPR_TCS_OFFCHIP_ADDR so as not to conflict with *_SGPR_TES_OFFCHIP_ADDR, which are in different user data SGPRs. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 10 +------ src/gallium/drivers/radeonsi/si_pipe.h | 1 - src/gallium/drivers/radeonsi/si_shader.c | 6 ++--- src/gallium/drivers/radeonsi/si_shader.h | 4 +-- src/gallium/drivers/radeonsi/si_shader_internal.h | 6 +---- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 31 +++++++++------------- src/gallium/drivers/radeonsi/si_state_draw.cpp | 5 ++-- 7 files changed, 21 insertions(+), 42 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 3beb574..73d654a 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -162,15 +162,7 @@ fetch_framebuffer(nir_builder *b, struct si_shader_args *args, static nir_ssa_def *build_tess_ring_desc(nir_builder *b, struct si_screen *screen, struct si_shader_args *args) { - nir_ssa_def *addr; - if (b->shader->info.stage == MESA_SHADER_TESS_CTRL) { - addr = ac_nir_load_arg(b, &args->ac, args->tcs_out_lds_layout); - /* TCS only receives high 13 bits of the address. */ - addr = nir_iand_imm(b, addr, 0xfff80000); - } else { - assert(b->shader->info.stage == MESA_SHADER_TESS_EVAL); - addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr); - } + nir_ssa_def *addr = ac_nir_load_arg(b, &args->ac, args->tes_offchip_addr); uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 34902f9..20860ec 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1183,7 +1183,6 @@ struct si_context { unsigned last_tes_sh_base; bool last_tess_uses_primid; unsigned num_patches_per_workgroup; - unsigned tcs_out_layout; unsigned tcs_out_offsets; unsigned tcs_offchip_layout; unsigned tes_offchip_ring_va_sgpr; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 38d29da..2b7b00d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -413,7 +413,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) declare_per_stage_desc_pointers(args, shader, true); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_offsets); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_layout); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); @@ -464,7 +464,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_offchip_layout); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_offsets); - ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tcs_out_lds_layout); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->tes_offchip_addr); if (stage == MESA_SHADER_VERTEX) declare_vb_descriptor_input_sgprs(args, shader); @@ -505,7 +505,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) * param_tcs_offchip_layout, and internal_bindings * should be passed to the epilog. */ - for (i = 0; i <= 8 + GFX9_SGPR_TCS_OUT_LAYOUT; i++) + for (i = 0; i <= 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR; i++) ac_add_return(&args->ac, AC_ARG_SGPR); for (i = 0; i < 11; i++) ac_add_return(&args->ac, AC_ARG_VGPR); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 4c5c143..31a6593 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -170,14 +170,14 @@ enum /* GFX6-8: TCS only */ GFX6_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, GFX6_SGPR_TCS_OUT_OFFSETS, - GFX6_SGPR_TCS_OUT_LAYOUT, + GFX6_SGPR_TCS_OFFCHIP_ADDR, GFX6_SGPR_TCS_IN_LAYOUT, GFX6_TCS_NUM_USER_SGPR, /* GFX9: Merged LS-HS (VS-TCS) only. */ GFX9_SGPR_TCS_OFFCHIP_LAYOUT = SI_VS_NUM_USER_SGPR, GFX9_SGPR_TCS_OUT_OFFSETS, - GFX9_SGPR_TCS_OUT_LAYOUT, + GFX9_SGPR_TCS_OFFCHIP_ADDR, GFX9_TCS_NUM_USER_SGPR, /* GS limits */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 4ed2256..8e8166d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -60,12 +60,8 @@ struct si_shader_args { * = 1M, clamped to 32K(LDS limit) / 4 = 8K */ struct ac_arg tcs_out_lds_offsets; - /* Layout of TCS outputs / TES inputs: - * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers - */ - struct ac_arg tcs_out_lds_layout; - /* API TES */ + /* API TCS & TES */ struct ac_arg tes_offchip_addr; /* PS */ struct ac_arg pos_fixed_pt; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 5be9647..626dabd 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -169,23 +169,16 @@ static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef typ enum si_tess_ring { - TCS_FACTOR_RING, - TESS_OFFCHIP_RING_TCS, - TESS_OFFCHIP_RING_TES, + TESS_FACTOR_RING, + TESS_OFFCHIP_RING, }; static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring) { LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef addr = ac_get_arg( - &ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->args->tes_offchip_addr : ctx->args->tcs_out_lds_layout); + LLVMValueRef addr = ac_get_arg(&ctx->ac, ctx->args->tes_offchip_addr); - /* TCS only receives high 13 bits of the address. */ - if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) { - addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), ""); - } - - if (ring == TCS_FACTOR_RING) { + if (ring == TESS_FACTOR_RING) { unsigned tf_offset = ctx->screen->hs.tess_offchip_ring_size; addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), ""); } @@ -335,7 +328,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); /* Get the buffer. */ - buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING); + buffer = get_tess_ring_descriptor(ctx, TESS_FACTOR_RING); /* Get the offset. */ tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset); @@ -372,7 +365,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader LLVMValueRef tf_inner_offset; unsigned param_outer, param_inner; - buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); + buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING); base = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset); param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER); @@ -433,14 +426,14 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx) if (ctx->screen->info.gfx_level >= GFX9) { ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); - ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); + ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR); /* Tess offchip and tess factor offsets are at the beginning. */ ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, 2); ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, 4); - vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1; + vgpr = 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR + 1; } else { ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT); - ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT); + ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, GFX6_SGPR_TCS_OFFCHIP_ADDR); /* Tess offchip and tess factor offsets are after user SGPRs. */ ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR); ret = si_insert_input_ret(ctx, ret, ctx->args->ac.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1); @@ -513,7 +506,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS); - ret = si_insert_input_ret(ctx, ret, ctx->args->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); + ret = si_insert_input_ret(ctx, ret, ctx->args->tes_offchip_addr, 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = LLVMBuildInsertValue(ctx->ac.builder, ret, @@ -581,7 +574,7 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_offchip_layout); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_out_lds_layout); + ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tes_offchip_addr); } else { ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); @@ -589,7 +582,7 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_offchip_layout); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); - ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tcs_out_lds_layout); + ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->tes_offchip_addr); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tess_offchip_offset); ac_add_arg(&ctx->args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args->ac.tcs_factor_offset); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index e215084..5f38851 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -797,7 +797,6 @@ void si_update_tess_io_layout_state(struct si_context *sctx) assert((ring_va & u_bit_consecutive(0, 19)) == 0); sctx->tes_offchip_ring_va_sgpr = ring_va; - sctx->tcs_out_layout = ring_va; sctx->tcs_out_offsets = ((perpatch_output_offset / 4) << 16); sctx->tcs_offchip_layout = (num_patches - 1) | ((num_tcs_output_cp - 1) << 6) | ((num_tcs_input_cp - 1) << 11) | @@ -863,7 +862,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx) R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3); radeon_emit(sctx->tcs_offchip_layout); radeon_emit(sctx->tcs_out_offsets); - radeon_emit(sctx->tcs_out_layout); + radeon_emit(sctx->tes_offchip_ring_va_sgpr); } else { /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ @@ -878,7 +877,7 @@ static void si_emit_tess_io_layout_state(struct si_context *sctx) R_00B430_SPI_SHADER_USER_DATA_HS_0 + GFX6_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); radeon_emit(sctx->tcs_offchip_layout); radeon_emit(sctx->tcs_out_offsets); - radeon_emit(sctx->tcs_out_layout); + radeon_emit(sctx->tes_offchip_ring_va_sgpr); radeon_emit(sctx->current_vs_state); } -- 2.7.4