From 214ddfb6884879a73168b55c306680c7d49d19f5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 24 Jun 2019 17:39:39 -0400 Subject: [PATCH] radeonsi/gfx10: implement si_shader_vs Only used with tessellation + GS instancing. Acked-by: Bas Nieuwenhuizen --- src/gallium/drivers/radeonsi/si_shader.c | 10 ++++--- src/gallium/drivers/radeonsi/si_state.c | 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 39 +++++++++++++++---------- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1dec250..9a4a416 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3656,7 +3656,8 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, } } - if (ctx->shader->selector->so.num_outputs) + if (ctx->ac.chip_class <= GFX9 && + ctx->shader->selector->so.num_outputs) si_llvm_emit_streamout(ctx, outputs, i, 0); /* Export PrimitiveID. */ @@ -4448,7 +4449,8 @@ static void declare_streamout_params(struct si_shader_context *ctx, struct pipe_stream_output_info *so, struct si_function_info *fninfo) { - int i; + if (ctx->ac.chip_class >= GFX10) + return; /* Streamout SGPRs. */ if (so->num_outputs) { @@ -4460,7 +4462,7 @@ static void declare_streamout_params(struct si_shader_context *ctx, ctx->param_streamout_write_index = add_arg(fninfo, ARG_SGPR, ctx->ac.i32); } /* A streamout buffer offset is loaded if the stride is non-zero. */ - for (i = 0; i < 4; i++) { + for (int i = 0; i < 4; i++) { if (!so->stride[i]) continue; @@ -5789,7 +5791,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, } /* Streamout and exports. */ - if (gs_selector->so.num_outputs) { + if (ctx.ac.chip_class <= GFX9 && gs_selector->so.num_outputs) { si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index c96f7af..90d332d 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5565,6 +5565,7 @@ static void si_init_config(struct si_context *sctx) */ si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); + si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, sscreen->info.pa_sc_tile_steering_override); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 5e1a166..cdcd87f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -418,7 +418,8 @@ static void polaris_set_vgt_vertex_reuse(struct si_screen *sscreen, { unsigned type = sel->type; - if (sscreen->info.family < CHIP_POLARIS10) + if (sscreen->info.family < CHIP_POLARIS10 || + sscreen->info.chip_class >= GFX10) return; /* VS as VS, or VS as ES: */ @@ -1371,21 +1372,27 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(va >> 40)); - si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, - S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | - S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8) | - S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B128_DX10_CLAMP(1) | - S_00B128_FLOAT_MODE(shader->config.float_mode)); - si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, - S_00B12C_USER_SGPR(num_user_sgprs) | - S_00B12C_OC_LDS_EN(oc_lds_en) | - S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | - S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) | - S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) | - S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) | - S_00B12C_SO_EN(!!shader->selector->so.num_outputs) | - S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); + + uint32_t rsrc1 = S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) | + S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | + S_00B128_DX10_CLAMP(1) | + S_00B128_MEM_ORDERED(sscreen->info.chip_class >= GFX10) | + S_00B128_FLOAT_MODE(shader->config.float_mode); + uint32_t rsrc2 = S_00B12C_USER_SGPR(num_user_sgprs) | + S_00B12C_OC_LDS_EN(oc_lds_en) | + S_00B12C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); + + if (sscreen->info.chip_class <= GFX9) { + rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8); + rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | + S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) | + S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) | + S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) | + S_00B12C_SO_EN(!!shader->selector->so.num_outputs); + } + + si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1); + si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2); if (window_space) shader->ctx_reg.vs.pa_cl_vte_cntl = -- 2.7.4