From 0062bb04accb7fdd6174b45b7db8c0fb962bdf02 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Fri, 28 Feb 2020 16:06:19 +0100 Subject: [PATCH] aco: Refactor load_per_vertex_input in preparation for tessellation. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit carves out the GS per-vertex input load, and takes the new helper functions into use. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 118 ++++++++++--------------- 1 file changed, 47 insertions(+), 71 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4b131bc..d535b86 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3679,21 +3679,21 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr) } } -void visit_load_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr) +std::pair get_gs_per_vertex_input_offset(isel_context *ctx, nir_intrinsic_instr *instr, unsigned base_stride = 1u) { - assert(ctx->stage == vertex_geometry_gs || ctx->stage == geometry_gs); assert(ctx->shader->info.stage == MESA_SHADER_GEOMETRY); Builder bld(ctx->program, ctx->block); - Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); + nir_src *vertex_src = nir_get_io_vertex_index_src(instr); + Temp vertex_offset; - Temp offset = Temp(); - if (instr->src[0].ssa->parent_instr->type != nir_instr_type_load_const) { + if (!nir_src_is_const(*vertex_src)) { /* better code could be created, but this case probably doesn't happen * much in practice */ - Temp indirect_vertex = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa)); + Temp indirect_vertex = as_vgpr(ctx, get_ssa_temp(ctx, vertex_src->ssa)); for (unsigned i = 0; i < ctx->shader->info.gs.vertices_in; i++) { Temp elem; + if (ctx->stage == vertex_geometry_gs) { elem = get_arg(ctx, ctx->args->gs_vtx_offset[i / 2u * 2u]); if (i % 2u) @@ -3701,86 +3701,62 @@ void visit_load_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr) } else { elem = get_arg(ctx, ctx->args->gs_vtx_offset[i]); } - if (offset.id()) { - Temp cond = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.def(s2)), + + if (vertex_offset.id()) { + Temp cond = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.def(bld.lm)), Operand(i), indirect_vertex); - offset = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), offset, elem, cond); + vertex_offset = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), vertex_offset, elem, cond); } else { - offset = elem; + vertex_offset = elem; } } + if (ctx->stage == vertex_geometry_gs) - offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu), offset); + vertex_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu), vertex_offset); } else { - unsigned vertex = nir_src_as_uint(instr->src[0]); + unsigned vertex = nir_src_as_uint(*vertex_src); if (ctx->stage == vertex_geometry_gs) - offset = bld.vop3( - aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->gs_vtx_offset[vertex / 2u * 2u]), - Operand((vertex % 2u) * 16u), Operand(16u)); + vertex_offset = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), + get_arg(ctx, ctx->args->gs_vtx_offset[vertex / 2u * 2u]), + Operand((vertex % 2u) * 16u), Operand(16u)); else - offset = get_arg(ctx, ctx->args->gs_vtx_offset[vertex]); + vertex_offset = get_arg(ctx, ctx->args->gs_vtx_offset[vertex]); } - unsigned const_offset = nir_intrinsic_base(instr); - const_offset += nir_intrinsic_component(instr); - - nir_instr *off_instr = instr->src[1].ssa->parent_instr; - if (off_instr->type != nir_instr_type_load_const) { - Temp indirect_offset = get_ssa_temp(ctx, instr->src[1].ssa); - offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), - bld.vadd32(bld.def(v1), indirect_offset, offset)); - } else { - const_offset += nir_instr_as_load_const(off_instr)->value[0].u32 * 4u; - } - const_offset *= 4u; + std::pair offs = get_intrinsic_io_basic_offset(ctx, instr, base_stride); + offs = offset_add(ctx, offs, std::make_pair(vertex_offset, 0u)); + return offset_mul(ctx, offs, 4u); +} - offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), offset); +void visit_load_gs_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr) +{ + assert(ctx->shader->info.stage == MESA_SHADER_GEOMETRY); + Builder bld(ctx->program, ctx->block); + Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8; - if (ctx->stage == geometry_gs) { - Temp esgs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_GS * 16u)); - - const_offset *= ctx->program->wave_size; - std::array elems; - aco_ptr vec{create_instruction( - aco_opcode::p_create_vector, Format::PSEUDO, instr->dest.ssa.num_components, 1)}; - for (unsigned i = 0; i < instr->dest.ssa.num_components; i++) { - Temp subelems[2]; - for (unsigned j = 0; j < elem_size_bytes / 4; j++) { - Operand soffset(0u); - if (const_offset >= 4096u) - soffset = bld.copy(bld.def(s1), Operand(const_offset / 4096u * 4096u)); - - aco_ptr mubuf{create_instruction(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)}; - mubuf->definitions[0] = bld.def(v1); - subelems[j] = mubuf->definitions[0].getTemp(); - mubuf->operands[0] = Operand(esgs_ring); - mubuf->operands[1] = Operand(offset); - mubuf->operands[2] = Operand(soffset); - mubuf->offen = true; - mubuf->offset = const_offset % 4096u; - mubuf->glc = true; - mubuf->dlc = ctx->options->chip_class >= GFX10; - mubuf->barrier = barrier_none; - mubuf->can_reorder = true; - bld.insert(std::move(mubuf)); - - const_offset += ctx->program->wave_size * 4u; - } - - if (elem_size_bytes == 4) - elems[i] = subelems[0]; - else - elems[i] = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), subelems[0], subelems[1]); - vec->operands[i] = Operand(elems[i]); - } - vec->definitions[0] = Definition(dst); - ctx->block->instructions.emplace_back(std::move(vec)); - ctx->allocated_vec.emplace(dst.id(), elems); + if (ctx->stage == geometry_gs) { + std::pair offs = get_gs_per_vertex_input_offset(ctx, instr, ctx->program->wave_size); + Temp ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_GS * 16u)); + load_vmem_mubuf(ctx, dst, ring, offs.first, Temp(), offs.second, elem_size_bytes, instr->dest.ssa.num_components, 4u * ctx->program->wave_size, false, true); + } else if (ctx->stage == vertex_geometry_gs) { + std::pair offs = get_gs_per_vertex_input_offset(ctx, instr); + unsigned lds_align = calculate_lds_alignment(ctx, offs.second); + load_lds(ctx, elem_size_bytes, dst, offs.first, offs.second, lds_align); } else { - unsigned align = calculate_lds_alignment(ctx, const_offset); - load_lds(ctx, elem_size_bytes, dst, offset, const_offset, align); + unreachable("Unsupported GS stage."); + } +} + +void visit_load_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr) +{ + switch (ctx->shader->info.stage) { + case MESA_SHADER_GEOMETRY: + visit_load_gs_per_vertex_input(ctx, instr); + break; + default: + unreachable("Unimplemented shader stage"); } } -- 2.7.4