It only exists because of the hw bug and is used very rarely.
Let's simplify it.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13393>
static void si_destroy_screen(struct pipe_screen *pscreen)
{
struct si_screen *sscreen = (struct si_screen *)pscreen;
- struct si_shader_part *parts[] = {sscreen->vs_prologs, sscreen->tcs_epilogs, sscreen->gs_prologs,
+ struct si_shader_part *parts[] = {sscreen->vs_prologs, sscreen->tcs_epilogs,
sscreen->ps_prologs, sscreen->ps_epilogs};
unsigned i;
simple_mtx_t shader_parts_mutex;
struct si_shader_part *vs_prologs;
struct si_shader_part *tcs_epilogs;
- struct si_shader_part *gs_prologs;
struct si_shader_part *ps_prologs;
struct si_shader_part *ps_epilogs;
key->ge.part.gs.es->info.stage == MESA_SHADER_VERTEX) {
si_dump_shader_key_vs(key, &key->ge.part.gs.vs_prolog, "part.gs.vs_prolog", f);
}
- fprintf(f, " part.gs.prolog.tri_strip_adj_fix = %u\n",
- key->ge.part.gs.prolog.tri_strip_adj_fix);
+ fprintf(f, " mono.u.gs_tri_strip_adj_fix = %u\n", key->ge.mono.u.gs_tri_strip_adj_fix);
fprintf(f, " as_ngg = %u\n", key->ge.as_ngg);
break;
assert(!prolog);
shader.key.ge.part.tcs.epilog = key->tcs_epilog.states;
break;
- case MESA_SHADER_GEOMETRY:
- assert(prolog);
- shader.key.ge.as_ngg = key->gs_prolog.as_ngg;
- break;
case MESA_SHADER_FRAGMENT:
if (prolog)
shader.key.ps.part.prolog = key->ps_prolog.states;
shader->previous_stage = es_main_part;
}
- if (!shader->key.ge.part.gs.prolog.tri_strip_adj_fix)
- return true;
-
- union si_shader_part_key prolog_key;
- memset(&prolog_key, 0, sizeof(prolog_key));
- prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
- prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
-
- shader->prolog2 =
- si_get_shader_part(sscreen, &sscreen->gs_prologs, MESA_SHADER_GEOMETRY, true, &prolog_key,
- compiler, debug, si_llvm_build_gs_prolog, "Geometry Shader Prolog");
- return shader->prolog2 != NULL;
+ return true;
}
/**
unsigned tes_reads_tess_factors : 1;
};
-struct si_gs_prolog_bits {
- unsigned tri_strip_adj_fix : 1;
-};
-
/* Common PS bits between the shader key and the prolog key. */
struct si_ps_prolog_bits {
unsigned color_two_side : 1;
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
- struct si_gs_prolog_bits states;
- unsigned as_ngg : 1;
- } gs_prolog;
- struct {
struct si_ps_prolog_bits states;
unsigned num_input_sgprs : 6;
unsigned num_input_vgprs : 5;
struct {
struct si_vs_prolog_bits vs_prolog; /* for merged ES-GS */
struct si_shader_selector *es; /* for merged ES-GS */
- struct si_gs_prolog_bits prolog;
} gs;
} part;
union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
union {
- uint64_t ff_tcs_inputs_to_copy; /* for fixed-func TCS */
+ uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */
/* When PS needs PrimID and GS is disabled. */
- unsigned vs_export_prim_id : 1;
+ unsigned vs_export_prim_id : 1; /* VS and TES only */
+ unsigned gs_tri_strip_adj_fix : 1; /* GS only */
} u;
} mono;
struct ac_llvm_compiler *compiler;
+ /* GS vertex offsets unpacked with the gfx6-9 tristrip_adj bug workaround. */
+ LLVMValueRef gs_vtx_offset[6];
+
/* Preloaded descriptors. */
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi);
void si_preload_esgs_ring(struct si_shader_context *ctx);
void si_preload_gs_rings(struct si_shader_context *ctx);
-void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_tess.c */
static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
{
- if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ /* Unpack GS vertex offsets. */
+ for (unsigned i = 0; i < 6; i++) {
+ if (ctx->screen->info.chip_class >= GFX9) {
+ ctx->gs_vtx_offset[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16);
+ } else {
+ ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[i]);
+ }
+ }
+
+ /* Apply the hw bug workaround for triangle strips with adjacency. */
+ if (ctx->screen->info.chip_class <= GFX9 &&
+ ctx->shader->key.ge.mono.u.gs_tri_strip_adj_fix) {
+ LLVMValueRef prim_id = ac_get_arg(&ctx->ac, ctx->args.gs_prim_id);
+ /* Remap GS vertex offsets for every other primitive. */
+ LLVMValueRef rotate = LLVMBuildTrunc(ctx->ac.builder, prim_id, ctx->ac.i1, "");
+ LLVMValueRef fixed[6];
+
+ for (unsigned i = 0; i < 6; i++) {
+ fixed[i] = LLVMBuildSelect(ctx->ac.builder, rotate,
+ ctx->gs_vtx_offset[(i + 4) % 6],
+ ctx->gs_vtx_offset[i], "");
+ }
+ memcpy(ctx->gs_vtx_offset, fixed, sizeof(fixed));
+ }
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
unsigned colors_read = ctx->shader->selector->info.colors_read;
LLVMValueRef main_fn = ctx->main_fn;
struct si_shader_selector *es = shader->key.ge.part.gs.es;
LLVMValueRef es_prolog = NULL;
LLVMValueRef es_main = NULL;
- LLVMValueRef gs_prolog = NULL;
LLVMValueRef gs_main = ctx.main_fn;
- /* GS prolog */
- union si_shader_part_key gs_prolog_key;
- memset(&gs_prolog_key, 0, sizeof(gs_prolog_key));
- gs_prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
- gs_prolog_key.gs_prolog.as_ngg = shader->key.ge.as_ngg;
- si_llvm_build_gs_prolog(&ctx, &gs_prolog_key);
- gs_prolog = ctx.main_fn;
-
/* ES main part */
struct si_shader shader_es = {};
shader_es.selector = es;
/* Prepare the array of shader parts. */
LLVMValueRef parts[4];
- unsigned num_parts = 0, main_part, next_first_part;
+ unsigned num_parts = 0, main_part;
if (es_prolog)
parts[num_parts++] = es_prolog;
parts[main_part = num_parts++] = es_main;
- parts[next_first_part = num_parts++] = gs_prolog;
parts[num_parts++] = gs_main;
- si_build_wrapper_function(&ctx, parts, num_parts, main_part, next_first_part, false);
+ si_build_wrapper_function(&ctx, parts, num_parts, main_part, main_part + 1, false);
} else {
- LLVMValueRef parts[2];
- union si_shader_part_key prolog_key;
-
- parts[1] = ctx.main_fn;
-
- memset(&prolog_key, 0, sizeof(prolog_key));
- prolog_key.gs_prolog.states = shader->key.ge.part.gs.prolog;
- si_llvm_build_gs_prolog(&ctx, &prolog_key);
- parts[0] = ctx.main_fn;
-
- si_build_wrapper_function(&ctx, parts, 2, 1, 0, false);
+ /* Nothing to do for gfx6-8. The shader has only 1 part and it's ctx.main_fn. */
}
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_FRAGMENT) {
si_llvm_build_monolithic_ps(&ctx, shader);
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->info.chip_class >= GFX9) {
- unsigned index = vtx_offset_param;
- vtx_offset =
- si_unpack_param(ctx, ctx->args.gs_vtx_offset[index / 2], (index & 1) * 16, 16);
-
unsigned offset = param * 4 + swizzle;
- vtx_offset =
- LLVMBuildAdd(ctx->ac.builder, vtx_offset, LLVMConstInt(ctx->ac.i32, offset, false), "");
+
+ vtx_offset = LLVMBuildAdd(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param],
+ LLVMConstInt(ctx->ac.i32, offset, false), "");
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
/* GFX6: input load from the ESGS ring in memory. */
/* Get the vertex offset parameter on GFX6. */
- LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[vtx_offset_param]);
-
- vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
+ vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param],
+ LLVMConstInt(ctx->ac.i32, 4, 0), "");
soffset = LLVMConstInt(ctx->ac.i32, (param * 4 + swizzle) * 256, 0);
return shader;
}
-/**
- * Build the GS prolog function. Rotate the input vertices for triangle strips
- * with adjacency.
- */
-void si_llvm_build_gs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
-{
- unsigned num_sgprs, num_vgprs;
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMTypeRef returns[AC_MAX_ARGS];
- LLVMValueRef func, ret;
-
- memset(&ctx->args, 0, sizeof(ctx->args));
-
- if (ctx->screen->info.chip_class >= GFX9) {
- /* Other user SGPRs are not needed by GS. */
- num_sgprs = 8 + SI_NUM_VS_STATE_RESOURCE_SGPRS;
- num_vgprs = 5; /* ES inputs are not needed by GS */
- } else {
- num_sgprs = GFX6_GS_NUM_USER_SGPR + 2;
- num_vgprs = 8;
- }
-
- for (unsigned i = 0; i < num_sgprs; ++i) {
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- returns[i] = ctx->ac.i32;
- }
-
- for (unsigned i = 0; i < num_vgprs; ++i) {
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL);
- returns[num_sgprs + i] = ctx->ac.f32;
- }
-
- /* Create the function. */
- si_llvm_create_func(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, 0);
- func = ctx->main_fn;
-
- /* Copy inputs to outputs. This should be no-op, as the registers match,
- * but it will prevent the compiler from overwriting them unintentionally.
- */
- ret = ctx->return_value;
- for (unsigned i = 0; i < num_sgprs; i++) {
- LLVMValueRef p = LLVMGetParam(func, i);
- ret = LLVMBuildInsertValue(builder, ret, p, i, "");
- }
- for (unsigned i = 0; i < num_vgprs; i++) {
- LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
- p = ac_to_float(&ctx->ac, p);
- ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
- }
-
- if (key->gs_prolog.states.tri_strip_adj_fix) {
- /* Remap the input vertices for every other primitive. */
- const struct ac_arg gfx6_vtx_params[6] = {
- {.used = true, .arg_index = num_sgprs}, {.used = true, .arg_index = num_sgprs + 1},
- {.used = true, .arg_index = num_sgprs + 3}, {.used = true, .arg_index = num_sgprs + 4},
- {.used = true, .arg_index = num_sgprs + 5}, {.used = true, .arg_index = num_sgprs + 6},
- };
- const struct ac_arg gfx9_vtx_params[3] = {
- {.used = true, .arg_index = num_sgprs},
- {.used = true, .arg_index = num_sgprs + 1},
- {.used = true, .arg_index = num_sgprs + 4},
- };
- LLVMValueRef vtx_in[6], vtx_out[6];
- LLVMValueRef prim_id, rotate;
-
- if (ctx->screen->info.chip_class >= GFX9) {
- for (unsigned i = 0; i < 3; i++) {
- vtx_in[i * 2] = si_unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
- vtx_in[i * 2 + 1] = si_unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
- }
- } else {
- for (unsigned i = 0; i < 6; i++)
- vtx_in[i] = ac_get_arg(&ctx->ac, gfx6_vtx_params[i]);
- }
-
- prim_id = LLVMGetParam(func, num_sgprs + 2);
- rotate = LLVMBuildTrunc(builder, prim_id, ctx->ac.i1, "");
-
- for (unsigned i = 0; i < 6; ++i) {
- LLVMValueRef base, rotated;
- base = vtx_in[i];
- rotated = vtx_in[(i + 4) % 6];
- vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
- }
-
- if (ctx->screen->info.chip_class >= GFX9) {
- for (unsigned i = 0; i < 3; i++) {
- LLVMValueRef hi, out;
-
- hi = LLVMBuildShl(builder, vtx_out[i * 2 + 1], LLVMConstInt(ctx->ac.i32, 16, 0), "");
- out = LLVMBuildOr(builder, vtx_out[i * 2], hi, "");
- out = ac_to_float(&ctx->ac, out);
- ret = LLVMBuildInsertValue(builder, ret, out, gfx9_vtx_params[i].arg_index, "");
- }
- } else {
- for (unsigned i = 0; i < 6; i++) {
- LLVMValueRef out;
-
- out = ac_to_float(&ctx->ac, vtx_out[i]);
- ret = LLVMBuildInsertValue(builder, ret, out, gfx6_vtx_params[i].arg_index, "");
- }
- }
- }
-
- LLVMBuildRet(builder, ret);
-}
-
void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_inputs = si_nir_load_input_gs;
bool gs_tri_strip_adj_fix =
!HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
- if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix) {
- sctx->shader.gs.key.ge.part.gs.prolog.tri_strip_adj_fix = gs_tri_strip_adj_fix;
+ if (gs_tri_strip_adj_fix != sctx->shader.gs.key.ge.mono.u.gs_tri_strip_adj_fix) {
+ sctx->shader.gs.key.ge.mono.u.gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
sctx->do_update_shaders = true;
}
}