void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
{
- struct si_shader_ctx_state *tcs_shader;
-
if (!log)
return;
- tcs_shader = &sctx->shader.tcs;
- if (sctx->shader.tes.cso && !sctx->shader.tcs.cso)
- tcs_shader = &sctx->fixed_func_tcs_shader;
-
si_dump_framebuffer(sctx, log);
si_dump_gfx_shader(sctx, &sctx->shader.vs, log);
- si_dump_gfx_shader(sctx, tcs_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->shader.tcs, log);
si_dump_gfx_shader(sctx, &sctx->shader.tes, log);
si_dump_gfx_shader(sctx, &sctx->shader.gs, log);
si_dump_gfx_shader(sctx, &sctx->shader.ps, log);
4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity,
log);
si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log);
- si_dump_gfx_descriptors(sctx, tcs_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log);
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
- if (sctx->fixed_func_tcs_shader.cso)
- sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
+ if (sctx->fixed_func_tcs_shader_cache) {
+ hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
+ sctx->b.delete_tcs_state(&sctx->b, entry->data);
+ }
+ _mesa_hash_table_destroy(sctx->fixed_func_tcs_shader_cache, NULL);
+ }
+
if (sctx->custom_dsa_flush)
sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
if (sctx->custom_blend_resolve)
struct si_screen *screen;
struct util_debug_callback debug;
struct ac_llvm_compiler compiler; /* only non-threaded compilation */
- struct si_shader_ctx_state fixed_func_tcs_shader;
+ struct hash_table *fixed_func_tcs_shader_cache;
struct si_resource *wait_mem_scratch;
struct si_resource *wait_mem_scratch_tmz;
unsigned wait_mem_number;
struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS];
};
struct si_cs_shader_state cs_shader_state;
+ /* if current tcs set by user */
+ bool is_user_tcs;
/* shader information */
uint64_t ps_inputs_read_or_disabled;
/* si_shaderlib_tgsi.c */
void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
unsigned num_layers);
-void *si_create_fixed_func_tcs(struct si_context *sctx);
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
bool dst_stream_cache_policy, bool is_copy);
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
}
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
- fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
- key->ge.mono.u.ff_tcs_inputs_to_copy);
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
break;
key->tcs_epilog.wave32 = shader->wave_size == 32;
key->tcs_epilog.states = shader->key.ge.part.tcs.epilog;
- /* If output patches are wholly in one wave, we don't need a barrier.
- * The fixed-func TCS doesn't set tcs_vertices_out, but it won't use a barrier
- * anyway because tess levels are always defined in all invocations there.
- */
+ /* If output patches are wholly in one wave, we don't need a barrier. */
key->tcs_epilog.noop_s_barrier =
- shader->selector->info.base.tess.tcs_vertices_out &&
shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0;
}
union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
union {
- uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */
/* When PS needs PrimID and GS is disabled. */
unsigned vs_export_prim_id : 1; /* VS and TES only */
unsigned gs_tri_strip_adj_fix : 1; /* GS only */
/* If both input and output patches are wholly in one wave, we don't need a barrier.
* That's true when both VS and TCS have the same number of patch vertices and
* the wave size is a multiple of the number of patch vertices.
- *
- * The fixed-func TCS doesn't set tcs_vertices_out.
*/
if (!shader->key.ge.opt.same_patch_vertices ||
- (sel->info.base.tess.tcs_vertices_out &&
- ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0))
+ ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0)
ac_build_s_barrier(&ctx->ac, ctx->stage);
}
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
{
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
- if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
- return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
-
return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
}
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
- if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
- return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
-
const struct si_shader_info *info = &ctx->shader->selector->info;
unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
: 0;
- /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
+ /* If !tcs_out_vertices, it's the TCS epilog. */
if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
}
}
-/**
- * Forward all outputs from the vertex shader to the TES. This is only used
- * for the fixed function TCS.
- */
-static void si_copy_tcs_inputs(struct si_shader_context *ctx)
-{
- LLVMValueRef invocation_id, buffer, buffer_offset;
- LLVMValueRef lds_vertex_stride, lds_base;
- uint64_t inputs;
-
- invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
- buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
- buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
-
- lds_vertex_stride = si_get_tcs_in_vertex_dw_stride(ctx);
- lds_base = get_tcs_in_current_patch_offset(ctx);
- lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
-
- inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
- while (inputs) {
- unsigned i = u_bit_scan64(&inputs);
-
- LLVMValueRef lds_ptr =
- LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
-
- LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
- ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
-
- LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
-
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset,
- ac_glc);
- }
-}
-
static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
- si_copy_tcs_inputs(ctx);
-
rel_patch_id = get_rel_patch_id(ctx);
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
return *vs;
}
-/**
- * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
- * VS passes its outputs to TES directly, so the fixed-function shader only
- * has to write TESSOUTER and TESSINNER.
- */
-void *si_create_fixed_func_tcs(struct si_context *sctx)
-{
- struct ureg_src outer, inner;
- struct ureg_dst tessouter, tessinner;
- struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
-
- if (!ureg)
- return NULL;
-
- outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0);
- inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0);
-
- tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
- tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
-
- ureg_MOV(ureg, tessouter, outer);
- ureg_MOV(ureg, tessinner, inner);
- ureg_END(ureg);
-
- return ureg_create_shader_and_destroy(ureg, &sctx->b);
-}
-
/* Create a compute shader implementing clear_buffer or copy_buffer. */
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
bool dst_stream_cache_policy, bool is_copy)
bool si_update_gs_ring_buffers(struct si_context *sctx);
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes);
unsigned si_calc_inst_pref_size(struct si_shader *shader);
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx);
/* si_state_draw.cpp */
void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
return false;
}
- if (sctx->shader.tcs.cso) {
- r = si_shader_select(ctx, &sctx->shader.tcs);
- if (r)
+ if (!sctx->is_user_tcs) {
+ if (!si_set_tcs_to_fixed_func_shader(sctx))
return false;
- si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
- } else {
- if (!sctx->fixed_func_tcs_shader.cso) {
- sctx->fixed_func_tcs_shader.cso =
- (struct si_shader_selector*)si_create_fixed_func_tcs(sctx);
- if (!sctx->fixed_func_tcs_shader.cso)
- return false;
-
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
- sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
- }
-
- r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
- if (r)
- return false;
- si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader.current);
}
+ r = si_shader_select(ctx, &sctx->shader.tcs);
+ if (r)
+ return false;
+ si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
+
if (!HAS_GS || GFX_VERSION <= GFX8) {
r = si_shader_select(ctx, &sctx->shader.tes);
if (r)
}
}
} else {
+ /* Reset TCS to clear fixed function shader. */
+ if (!sctx->is_user_tcs && sctx->shader.tcs.cso) {
+ sctx->shader.tcs.cso = NULL;
+ sctx->shader.tcs.current = NULL;
+ }
+
if (GFX_VERSION <= GFX8) {
si_pm4_bind_state(sctx, ls, NULL);
sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
{
struct si_shader *ls_current;
struct si_shader_selector *ls;
- /* The TES pointer will only be used for sctx->last_tcs.
- * It would be wrong to think that TCS = TES. */
- struct si_shader_selector *tcs =
- sctx->shader.tcs.cso ? sctx->shader.tcs.cso : sctx->shader.tes.cso;
+ struct si_shader_selector *tcs = sctx->shader.tcs.cso;
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
bool has_primid_instancing_bug = sctx->gfx_level == GFX6 && sctx->screen->info.max_se == 1;
unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
/* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
if (sctx->gfx_level >= GFX9) {
- if (sctx->shader.tcs.cso)
- ls_current = sctx->shader.tcs.current;
- else
- ls_current = sctx->fixed_func_tcs_shader.current;
-
+ ls_current = sctx->shader.tcs.current;
ls = ls_current->key.ge.part.tcs.ls;
} else {
ls_current = sctx->shader.vs.current;
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
- unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
- unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;
-
- if (sctx->shader.tcs.cso) {
- num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
- num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
- num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
- } else {
- /* No TCS. Route varyings from LS to TES. */
- num_tcs_outputs = num_tcs_inputs;
- num_tcs_output_cp = num_tcs_input_cp;
- num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
- }
+ unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
+ unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
+ unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
unsigned input_vertex_size = ls->info.lshs_vertex_stride;
unsigned output_vertex_size = num_tcs_outputs * 16;
si_need_gfx_cs_space(sctx, num_draws);
if (HAS_TESS) {
- struct si_shader_selector *tcs = sctx->shader.tcs.cso;
+ if (sctx->is_user_tcs) {
+ struct si_shader_selector *tcs = sctx->shader.tcs.cso;
- /* The rarely occuring tcs == NULL case is not optimized. */
- bool same_patch_vertices =
- GFX_VERSION >= GFX9 &&
- tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
+ bool same_patch_vertices =
+ GFX_VERSION >= GFX9 &&
+ sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
- if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
- sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
- sctx->do_update_shaders = true;
- }
+ if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
+ sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
+ sctx->do_update_shaders = true;
+ }
- if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
- /* Determine whether the LS VGPR fix should be applied.
- *
- * It is only required when num input CPs > num output CPs,
- * which cannot happen with the fixed function TCS. We should
- * also update this bit when switching from TCS to fixed
- * function TCS.
+ if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
+ /* Determine whether the LS VGPR fix should be applied.
+ *
+ * It is only required when num input CPs > num output CPs,
+ * which cannot happen with the fixed function TCS.
+ */
+ bool ls_vgpr_fix =
+ sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+
+ if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
+ sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+ } else {
+ /* These fields are static for fixed function TCS. So no need to set
+ * do_update_shaders between fixed-TCS draws. As fixed-TCS to user-TCS
+ * or opposite, do_update_shaders should already be set by bind state.
*/
- bool ls_vgpr_fix =
- tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+ sctx->shader.tcs.key.ge.opt.same_patch_vertices = GFX_VERSION >= GFX9;
+ sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = false;
- if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
- sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+ /* User may only change patch vertices, needs to update fixed func TCS. */
+ if (sctx->shader.tcs.cso &&
+ sctx->shader.tcs.cso->info.base.tess.tcs_vertices_out != sctx->patch_vertices)
sctx->do_update_shaders = true;
- }
}
}
sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
- sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;
if (si_update_ngg(sctx))
si_shader_change_notify(sctx);
struct si_shader_selector *sel = (struct si_shader_selector*)state;
bool enable_changed = !!sctx->shader.tcs.cso != !!sel;
+ /* Note it could happen that user shader sel is same as fixed function shader,
+ * so we should update this field even sctx->shader.tcs.cso == sel.
+ */
+ sctx->is_user_tcs = !!sel;
+
if (sctx->shader.tcs.cso == sel)
return;
si_update_tess_uses_prim_id(sctx);
sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
sel ? sel->info.base.tess._primitive_mode : 0;
sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
sel ? sel->info.reads_tess_factors : 0;
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
return 1;
}
-static struct si_shader *si_get_tcs_current(struct si_context *sctx)
-{
- if (!sctx->shader.tes.cso)
- return NULL; /* tessellation disabled */
-
- return sctx->shader.tcs.cso ? sctx->shader.tcs.current : sctx->fixed_func_tcs_shader.current;
-}
-
static bool si_update_scratch_relocs(struct si_context *sctx)
{
- struct si_shader *tcs = si_get_tcs_current(sctx);
int r;
/* Update the shaders, so that they are using the latest scratch.
if (r == 1)
si_pm4_bind_state(sctx, gs, sctx->shader.gs.current);
- r = si_update_scratch_buffer(sctx, tcs);
+ r = si_update_scratch_buffer(sctx, sctx->shader.tcs.current);
if (r < 0)
return false;
if (r == 1)
- si_pm4_bind_state(sctx, hs, tcs);
+ si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
/* VS can be bound as LS, ES, or VS. */
r = si_update_scratch_buffer(sctx, sctx->shader.vs.current);
}
}
+struct si_fixed_func_tcs_shader_key {
+ uint64_t outputs_written;
+ uint8_t vertices_out;
+};
+
+static uint32_t si_fixed_func_tcs_shader_key_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct si_fixed_func_tcs_shader_key));
+}
+
+static bool si_fixed_func_tcs_shader_key_equals(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct si_fixed_func_tcs_shader_key)) == 0;
+}
+
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx)
+{
+ if (!sctx->fixed_func_tcs_shader_cache) {
+ sctx->fixed_func_tcs_shader_cache = _mesa_hash_table_create(
+ NULL, si_fixed_func_tcs_shader_key_hash,
+ si_fixed_func_tcs_shader_key_equals);
+ }
+
+ struct si_fixed_func_tcs_shader_key key;
+ key.outputs_written = sctx->shader.vs.cso->info.outputs_written;
+ key.vertices_out = sctx->patch_vertices;
+
+ struct hash_entry *entry = _mesa_hash_table_search(
+ sctx->fixed_func_tcs_shader_cache, &key);
+
+ struct si_shader_selector *tcs;
+ if (entry)
+ tcs = (struct si_shader_selector *)entry->data;
+ else {
+ tcs = (struct si_shader_selector *)si_create_passthrough_tcs(sctx);
+ if (!tcs)
+ return false;
+ _mesa_hash_table_insert(sctx->fixed_func_tcs_shader_cache, &key, (void *)tcs);
+ }
+
+ sctx->shader.tcs.cso = tcs;
+ sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
+ tcs->info.tessfactors_are_def_in_all_invocs;
+
+ return true;
+}
+
void si_init_screen_live_shader_cache(struct si_screen *sscreen)
{
util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,