assert(sel->info.stage == MESA_SHADER_VERTEX ||
(sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.as_es));
- LLVMValueRef position[4] = {};
+ LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+ unsigned pos_index = 0;
+
for (unsigned i = 0; i < info->num_outputs; i++) {
+ LLVMValueRef position[4];
+
switch (info->output_semantic[i]) {
case VARYING_SLOT_POS:
+ pos_index = i;
for (unsigned j = 0; j < 4; j++) {
position[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
}
+
+ /* Store Position.W into LDS. */
+ LLVMBuildStore(
+ builder, ac_to_integer(&ctx->ac, position[3]),
+ ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_pos_w, 0)));
+
+ /* Store Position.XY / W into LDS. */
+ for (unsigned chan = 0; chan < 2; chan++) {
+ LLVMValueRef val = ac_build_fdiv(&ctx->ac, position[chan], position[3]);
+ LLVMBuildStore(
+ builder, ac_to_integer(&ctx->ac, val),
+ ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_pos_x_div_w + chan, 0)));
+ }
break;
}
}
- assert(position[0]);
-
- /* Store Position.XYZW into LDS. */
- LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
- for (unsigned chan = 0; chan < 4; chan++) {
- LLVMBuildStore(
- builder, ac_to_integer(&ctx->ac, position[chan]),
- ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_pos_x + chan, 0)));
- }
- /* Store Position.XY / W into LDS. */
- for (unsigned chan = 0; chan < 2; chan++) {
- LLVMValueRef val = ac_build_fdiv(&ctx->ac, position[chan], position[3]);
- LLVMBuildStore(
- builder, ac_to_integer(&ctx->ac, val),
- ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_pos_x_div_w + chan, 0)));
- }
/* Store VertexID and InstanceID. ES threads will have to load them
* from LDS after vertex compaction and use them instead of their own
{
LLVMValueRef old_id = get_thread_id_in_tg(ctx);
LLVMValueRef new_id = ac_prefix_bitcount_2x64(&ctx->ac, es_mask, old_id);
+ LLVMValueRef new_vtx = ngg_nogs_vertex_ptr(ctx, new_id);
LLVMBuildStore(
builder, LLVMBuildTrunc(builder, old_id, ctx->ac.i8, ""),
- si_build_gep_i8(ctx, ngg_nogs_vertex_ptr(ctx, new_id), lds_byte0_old_thread_id));
+ si_build_gep_i8(ctx, new_vtx, lds_byte0_old_thread_id));
LLVMBuildStore(builder, LLVMBuildTrunc(builder, new_id, ctx->ac.i8, ""),
si_build_gep_i8(ctx, es_vtxptr, lds_byte1_new_thread_id));
+
+ /* Store Position.XYZW into LDS. */
+ for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMBuildStore(
+ builder, ac_to_integer(&ctx->ac, LLVMBuildLoad(builder, addrs[4 * pos_index + chan], "")),
+ ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_pos_x + chan, 0)));
+ }
}
ac_build_endif(&ctx->ac, 16009);
if (num_vgprs == 3)
vgpr++;
}
- /* Return the old thread ID. */
- val = LLVMBuildLoad(builder, old_thread_id, "");
- ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++, "");
/* These two also use LDS. */
if (sel->info.writes_edgeflag ||
*/
if (info->output_semantic[i] == VARYING_SLOT_POS &&
ctx->shader->key.opt.ngg_culling) {
- vertex_ptr = ngg_nogs_vertex_ptr(ctx, ac_get_arg(&ctx->ac, ctx->ngg_old_thread_id));
+ vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned j = 0; j < 4; j++) {
tmp = LLVMConstInt(ctx->ac.i32, lds_pos_x + j, 0);
}
}
-static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_prolog_vgprs,
- bool ngg_cull_shader)
+static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_prolog_vgprs)
{
struct si_shader *shader = ctx->shader;
}
if (!shader->is_gs_copy_shader) {
- if (shader->key.opt.ngg_culling && !ngg_cull_shader) {
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->ngg_old_thread_id);
- }
-
/* Vertex load indices. */
if (shader->selector->info.num_inputs) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->vertex_index0);
}
}
-static void declare_tes_input_vgprs(struct si_shader_context *ctx, bool ngg_cull_shader)
+static void declare_tes_input_vgprs(struct si_shader_context *ctx)
{
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_u);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_v);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->tes_rel_patch_id);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_patch_id);
-
- if (ctx->shader->key.opt.ngg_culling && !ngg_cull_shader) {
- ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->ngg_old_thread_id);
- }
}
enum
declare_vs_blit_inputs(ctx, shader->selector->info.base.vs.blit_sgprs_amd);
/* VGPRs */
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
break;
}
}
/* VGPRs */
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
/* Return values */
if (shader->key.opt.vs_as_prim_discard_cs) {
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_rel_ids);
if (ctx->stage == MESA_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
/* LS return values are inputs to the TCS main shader part. */
for (i = 0; i < 8 + GFX9_TCS_NUM_USER_SGPR; i++)
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx45_offset);
if (ctx->stage == MESA_SHADER_VERTEX) {
- declare_vs_input_vgprs(ctx, &num_prolog_vgprs, ngg_cull_shader);
+ declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- declare_tes_input_vgprs(ctx, ngg_cull_shader);
+ declare_tes_input_vgprs(ctx);
}
if ((ctx->shader->key.as_es || ngg_cull_shader) &&
num_user_sgprs = GFX9_TESGS_NUM_USER_SGPR;
}
- /* The NGG cull shader has to return all 9 VGPRs + the old thread ID.
+ /* The NGG cull shader has to return all 9 VGPRs.
*
* The normal merged ESGS shader only has to return the 5 VGPRs
* for the GS stage.
*/
- num_vgprs = ngg_cull_shader ? 10 : 5;
+ num_vgprs = ngg_cull_shader ? 9 : 5;
/* ES return values are inputs to GS. */
for (i = 0; i < 8 + num_user_sgprs; i++)
}
/* VGPRs */
- declare_tes_input_vgprs(ctx, ngg_cull_shader);
+ declare_tes_input_vgprs(ctx);
break;
case MESA_SHADER_GEOMETRY:
!!(shader_out->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST);
key->vs_prolog.gs_fast_launch_tri_strip =
!!(shader_out->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP);
- } else {
- key->vs_prolog.has_ngg_cull_inputs = !!shader_out->key.opt.ngg_culling;
}
if (shader_out->selector->info.stage == MESA_SHADER_TESS_CTRL) {
unsigned as_es : 1;
unsigned as_ngg : 1;
unsigned as_prim_discard_cs : 1;
- unsigned has_ngg_cull_inputs : 1; /* from the NGG cull shader */
unsigned gs_fast_launch_tri_list : 1; /* for NGG culling */
unsigned gs_fast_launch_tri_strip : 1; /* for NGG culling */
/* Prologs for monolithic shaders shouldn't set EXEC. */
*/
struct ac_arg vs_state_bits;
struct ac_arg vs_blit_inputs;
- struct ac_arg ngg_old_thread_id; /* generated by the NGG cull shader */
/* HW VS */
struct ac_arg streamout_config;
struct ac_arg streamout_write_index;
int num_returns, i;
unsigned first_vs_vgpr = key->vs_prolog.num_merged_next_stage_vgprs;
unsigned num_input_vgprs =
- key->vs_prolog.num_merged_next_stage_vgprs + 4 + (key->vs_prolog.has_ngg_cull_inputs ? 1 : 0);
+ key->vs_prolog.num_merged_next_stage_vgprs + 4;
struct ac_arg input_sgpr_param[key->vs_prolog.num_input_sgprs];
struct ac_arg input_vgpr_param[10];
LLVMValueRef input_vgprs[10];