if (gfx10_ngg_export_prim_early(shader))
gfx10_ngg_build_export_prim(ctx, NULL, LLVMBuildLoad(builder, new_vgpr0, ""));
- /* Set the new ES input VGPRs. */
- LLVMValueRef es_data[4];
-
- for (unsigned i = 0; i < 4; i++)
- es_data[i] = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
-
- ac_build_ifcc(&ctx->ac, LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, tid, new_num_es_threads, ""),
- 16012);
- {
- LLVMValueRef tmp;
-
- for (unsigned i = 0; i < 2; i++) {
- tmp = LLVMBuildLoad(
- builder,
- ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_vertex_id + i, 0)),
- "");
- LLVMBuildStore(builder, tmp, es_data[i]);
- }
-
- if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- tmp = LLVMBuildLoad(builder,
- si_build_gep_i8(ctx, es_vtxptr, lds_byte2_tes_rel_patch_id), "");
- tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
- LLVMBuildStore(builder, tmp, es_data[2]);
-
- if (uses_tes_prim_id) {
- tmp = LLVMBuildLoad(builder,
- ac_build_gep0(&ctx->ac, es_vtxptr,
- LLVMConstInt(ctx->ac.i32, lds_tes_patch_id, 0)),
- "");
- LLVMBuildStore(builder, tmp, es_data[3]);
- }
+ /* Prepare LDS addresses of the new ES input VGPRs. */
+ LLVMValueRef input_vgpr_addresses[4] = {
+ ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_vertex_id, 0)),
+ ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_instance_id, 0)),
+ };
+ if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ input_vgpr_addresses[2] = si_build_gep_i8(ctx, es_vtxptr, lds_byte2_tes_rel_patch_id);
+ if (uses_tes_prim_id) {
+ input_vgpr_addresses[3] = ac_build_gep0(&ctx->ac, es_vtxptr,
+ LLVMConstInt(ctx->ac.i32, lds_tes_patch_id, 0));
}
}
- ac_build_endif(&ctx->ac, 16012);
/* Return values for the main function. */
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ret_float(ctx, ret, ctx->args.gs_invocation_id, vgpr++);
vgpr++; /* gs_vtx45_offset */
+ /* Set the input VPGRs to the corresponding LDS addresses where the VGPR values are
+ * stored. The VS prolog will load them.
+ */
if (ctx->stage == MESA_SHADER_VERTEX) {
- val = LLVMBuildLoad(builder, es_data[0], "");
+ val = LLVMBuildPtrToInt(builder, input_vgpr_addresses[0], ctx->ac.i32, "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++,
""); /* VGPR5 - VertexID */
vgpr += 2;
if (uses_instance_id) {
- val = LLVMBuildLoad(builder, es_data[1], "");
+ val = LLVMBuildPtrToInt(builder, input_vgpr_addresses[1], ctx->ac.i32, "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++,
""); /* VGPR8 - InstanceID */
} else {
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
unsigned num_vgprs = uses_tes_prim_id ? 4 : 3;
for (unsigned i = 0; i < num_vgprs; i++) {
- val = LLVMBuildLoad(builder, es_data[i], "");
+ val = LLVMBuildPtrToInt(builder, input_vgpr_addresses[i], ctx->ac.i32, "");
ret = LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, val), vgpr++, "");
}
if (num_vgprs == 3)
if (shader->is_monolithic && ctx.stage == MESA_SHADER_VERTEX) {
LLVMValueRef parts[4];
unsigned num_parts = 0;
- bool has_prolog = false;
+ bool first_is_prolog = false;
LLVMValueRef main_fn = ctx.main_fn;
if (ngg_cull_main_fn) {
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
- has_prolog = true;
+ first_is_prolog = true;
}
parts[num_parts++] = ngg_cull_main_fn;
}
prolog_key.vs_prolog.is_monolithic = true;
si_llvm_build_vs_prolog(&ctx, &prolog_key);
parts[num_parts++] = ctx.main_fn;
- has_prolog = true;
+ if (num_parts == 1)
+ first_is_prolog = true;
}
parts[num_parts++] = main_fn;
- si_build_wrapper_function(&ctx, parts, num_parts, has_prolog ? 1 : 0, 0, false);
+ si_build_wrapper_function(&ctx, parts, num_parts, first_is_prolog ? 1 : 0, 0, false);
if (ctx.shader->key.opt.vs_as_prim_discard_cs)
si_build_prim_discard_compute_shader(&ctx);
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_EVAL && ngg_cull_main_fn) {
- LLVMValueRef parts[2];
+ LLVMValueRef parts[3], prolog, main_fn = ctx.main_fn;
+
+ /* We reuse the VS prolog code for TES just to load the input VGPRs from LDS. */
+ union si_shader_part_key prolog_key;
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+ prolog_key.vs_prolog.num_merged_next_stage_vgprs = 5;
+ prolog_key.vs_prolog.as_ngg = 1;
+ prolog_key.vs_prolog.load_vgprs_after_culling = 1;
+ prolog_key.vs_prolog.is_monolithic = true;
+ si_llvm_build_vs_prolog(&ctx, &prolog_key);
+ prolog = ctx.main_fn;
parts[0] = ngg_cull_main_fn;
- parts[1] = ctx.main_fn;
+ parts[1] = prolog;
+ parts[2] = main_fn;
- si_build_wrapper_function(&ctx, parts, 2, 0, 0, false);
+ si_build_wrapper_function(&ctx, parts, 3, 0, 0, false);
} else if (shader->is_monolithic && ctx.stage == MESA_SHADER_TESS_CTRL) {
if (sscreen->info.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;