enum sendmsg {
sendmsg_none = 0,
- _sendmsg_gs = 2, /* gfx6 to gfx10.3 */
- _sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
+ sendmsg_gs = 2, /* gfx6 to gfx10.3 */
+ sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
sendmsg_hs_tessfactor = 2, /* gfx11+ */
sendmsg_dealloc_vgprs = 3, /* gfx11+ */
sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
sendmsg_rtn_mask = 0xff,
};
-inline sendmsg
-sendmsg_gs(bool cut, bool emit, unsigned stream)
-{
- assert(stream < 4);
- return (sendmsg)((unsigned)_sendmsg_gs | (cut << 4) | (emit << 5) | (stream << 8));
-}
-
-inline sendmsg
-sendmsg_gs_done(bool cut, bool emit, unsigned stream)
-{
- assert(stream < 4);
- return (sendmsg)((unsigned)_sendmsg_gs_done | (cut << 4) | (emit << 5) | (stream << 8));
-}
-
enum bperm_swiz {
bperm_b1_sign = 8,
bperm_b3_sign = 9,
break;
}
- case nir_intrinsic_emit_vertex_with_counter: {
- assert(ctx->stage.hw == HWStage::GS);
- unsigned stream = nir_intrinsic_stream_id(instr);
- bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
- break;
- }
- case nir_intrinsic_end_primitive_with_counter: {
- if (ctx->stage.hw != HWStage::NGG) {
- unsigned stream = nir_intrinsic_stream_id(instr);
- bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1,
- sendmsg_gs(true, false, stream));
- }
- break;
- }
case nir_intrinsic_sendmsg_amd: {
unsigned imm = nir_intrinsic_base(instr);
Temp m0_content = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
break;
}
- case nir_intrinsic_alloc_vertices_and_primitives_amd: {
- assert(ctx->stage.hw == HWStage::NGG);
- Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
- Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
-
- /* Put the number of vertices and primitives into m0 for the GS_ALLOC_REQ */
- Temp tmp =
- bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc),
- num_primitives, Operand::c32(12u));
- tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc),
- tmp, num_vertices);
-
- /* Request the SPI to allocate space for the primitives and vertices
- * that will be exported by the threadgroup.
- */
- bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req);
- break;
- }
case nir_intrinsic_gds_atomic_add_amd: {
Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
bld.barrier(aco_opcode::p_barrier,
memory_sync_info(storage_shared, semantic_acqrel, scope), scope);
}
-
- if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) {
- ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc),
- get_arg(&ctx, args->merged_wave_info), Operand::c32(2u),
- Operand::c32(8u), Operand::zero());
- }
- } else if (ctx.stage == geometry_gs)
- ctx.gs_wave_id = get_arg(&ctx, args->gs_wave_id);
+ }
visit_cf_list(&ctx, &func->body);
Temp arg_temps[AC_MAX_ARGS];
- /* GS inputs */
- Temp gs_wave_id;
-
/* VS output information */
bool export_clip_dists;
unsigned num_clip_distances;
}
case aco_opcode::s_sendmsg: {
unsigned id = imm & sendmsg_id_mask;
- static_assert(_sendmsg_gs == sendmsg_hs_tessfactor);
- static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs);
+ static_assert(sendmsg_gs == sendmsg_hs_tessfactor);
+ static_assert(sendmsg_gs_done == sendmsg_dealloc_vgprs);
switch (id) {
case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
- case _sendmsg_gs:
+ case sendmsg_gs:
if (gfx_level >= GFX11)
fprintf(output, " sendmsg(hs_tessfactor)");
else
fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
imm & 0x20 ? ", emit" : "", imm >> 8);
break;
- case _sendmsg_gs_done:
+ case sendmsg_gs_done:
if (gfx_level >= GFX11)
fprintf(output, " sendmsg(dealloc_vgprs)");
else
is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
{
if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
- return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
+ return (instr->sopp().imm & sendmsg_id_mask) == sendmsg_gs_done;
return false;
}
nir_intrinsic_io_semantics(instr).high_16bits);
break;
}
- case nir_intrinsic_emit_vertex_with_counter: {
- unsigned stream = nir_intrinsic_stream_id(instr);
- LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
- ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
- break;
- }
- case nir_intrinsic_end_primitive:
- case nir_intrinsic_end_primitive_with_counter:
- ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
- break;
case nir_intrinsic_sendmsg_amd: {
unsigned imm = nir_intrinsic_base(instr);
LLVMValueRef m0_content = get_src(ctx, instr->src[0]);
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9);
break;
- case nir_intrinsic_alloc_vertices_and_primitives_amd: {
- /* The caller should only call this conditionally for wave 0.
- *
- * Send GS Alloc Req message from the first wave of the group to SPI.
- * Message payload is:
- * - bits 0..10: vertices in group
- * - bits 12..22: primitives in group
- */
- LLVMValueRef vtx_cnt = get_src(ctx, instr->src[0]);
- LLVMValueRef prim_cnt = get_src(ctx, instr->src[1]);
- LLVMValueRef msg = LLVMBuildShl(ctx->ac.builder, prim_cnt,
- LLVMConstInt(ctx->ac.i32, 12, false), "");
- msg = LLVMBuildOr(ctx->ac.builder, msg, vtx_cnt, "");
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, msg);
- break;
- }
case nir_intrinsic_overwrite_vs_arguments_amd:
ctx->abi->vertex_id_replaced = get_src(ctx, instr->src[0]);
ctx->abi->instance_id_replaced = get_src(ctx, instr->src[1]);
/* Varying -> attribute number mapping. Also NIR-only */
unsigned fs_input_attr_indices[MAX_VARYING];
- void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
-
- void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
- LLVMValueRef vertexidx, LLVMValueRef *addrs);
-
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
}
}
-static void
-visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
- LLVMValueRef *addrs)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
- ctx->gs_wave_id);
-}
-
-static void
-visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
- ctx->gs_wave_id);
-}
-
static LLVMValueRef
radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
{
ctx.shader = shaders[shader_idx];
ctx.output_mask = 0;
- if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) {
- ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
- ctx.abi.emit_primitive = visit_end_primitive;
- }
-
if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) {
/* Execute a barrier before the second shader in
* a merged shader.
intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
# Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export.
intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[])
-# Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}.
-intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[])
# Corresponds to s_sendmsg in the GCN/RDNA ISA, src[] = { m0_content }, BASE = imm
intrinsic("sendmsg_amd", src_comp=[1], indices=[BASE])
# Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}.
void si_preload_esgs_ring(struct si_shader_context *ctx);
void si_preload_gs_rings(struct si_shader_context *ctx);
void si_llvm_gs_build_end(struct si_shader_context *ctx);
-void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
/* si_shader_llvm_tess.c */
LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx);
break;
case MESA_SHADER_GEOMETRY:
- si_llvm_init_gs_callbacks(ctx);
-
if (ctx->shader->key.ge.as_ngg) {
LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
si_set_es_return_value_for_gs(ctx);
}
-static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
-{
- if (ctx->screen->info.gfx_level >= GFX9)
- return si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 16, 8);
- else
- return ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
-}
-
void si_llvm_gs_build_end(struct si_shader_context *ctx)
{
if (ctx->screen->info.gfx_level >= GFX9)
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
}
-/* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream,
- LLVMValueRef vertexidx, LLVMValueRef *addrs)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
- assert(!ctx->shader->key.ge.as_ngg);
-
- /* Signal vertex emission */
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
- si_get_gs_wave_id(ctx));
-}
-
-/* Cut one primitive from the geometry shader */
-static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
- assert(!ctx->shader->key.ge.as_ngg);
-
- /* Signal primitive cut */
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
- si_get_gs_wave_id(ctx));
-}
-
void si_preload_esgs_ring(struct si_shader_context *ctx)
{
LLVMBuilderRef builder = ctx->ac.builder;
ctx->gsvs_ring[stream] = ring;
}
}
-
-void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
-{
- ctx->abi.emit_vertex_with_counter = si_llvm_emit_vertex;
- ctx->abi.emit_primitive = si_llvm_emit_primitive;
-}