From 8ec682433573ededbdae7f63490cabf5bdfaa89a Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 8 Oct 2021 16:14:15 +0200 Subject: [PATCH] radv,aco: decouple shader_info/options from radv_shader_args Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 58 ++-- src/amd/compiler/aco_instruction_selection.h | 2 + .../compiler/aco_instruction_selection_setup.cpp | 54 ++-- src/amd/compiler/aco_interface.cpp | 48 ++-- src/amd/compiler/aco_interface.h | 16 +- src/amd/compiler/aco_ir.h | 16 +- src/amd/vulkan/radv_nir_to_llvm.c | 292 +++++++++---------- src/amd/vulkan/radv_private.h | 7 +- src/amd/vulkan/radv_shader.c | 20 +- src/amd/vulkan/radv_shader_args.c | 311 +++++++++++---------- src/amd/vulkan/radv_shader_args.h | 11 +- 11 files changed, 446 insertions(+), 389 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 5f1648e..690baf8 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -5004,7 +5004,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); nir_src offset = *nir_get_io_offset_src(instr); - if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.dynamic_inputs) { + if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->program->info->vs.dynamic_inputs) { if (!nir_src_is_const(offset) || nir_src_as_uint(offset)) isel_err(offset.ssa->parent_instr, "Unimplemented non-zero nir_intrinsic_load_input offset"); @@ -5530,12 +5530,12 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) nir_const_value* index_cv = nir_src_as_const_value(instr->src[0]); if (index_cv && instr->dest.ssa.bit_size == 32) { - struct radv_userdata_info *loc = - &ctx->args->shader_info->user_sgprs_locs.shader_data[AC_UD_INLINE_PUSH_CONSTANTS]; + const struct radv_userdata_info *loc = + &ctx->program->info->user_sgprs_locs.shader_data[AC_UD_INLINE_PUSH_CONSTANTS]; unsigned start = (offset + index_cv->u32) / 4u; unsigned num_inline_push_consts = loc->sgpr_idx != -1 ? loc->num_sgprs : 0; - start -= ctx->args->shader_info->min_push_constant_used / 4; + start -= ctx->program->info->min_push_constant_used / 4; if (start + count <= num_inline_push_consts) { std::array elems; aco_ptr vec{create_instruction( @@ -8841,7 +8841,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) ctx->shader->info.stage == MESA_SHADER_TESS_EVAL); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - bld.copy(Definition(dst), Operand::c32(ctx->args->options->key.tcs.tess_input_vertices)); + bld.copy(Definition(dst), Operand::c32(ctx->options->key.tcs.tess_input_vertices)); break; } case nir_intrinsic_emit_vertex_with_counter: { @@ -11574,9 +11574,11 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, - ac_shader_config* config, const struct radv_shader_args* args) + ac_shader_config* config, const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_shader_args* args) { - isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args, false); + isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false); if_context ic_merged_wave_info; bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS); @@ -11591,12 +11593,12 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const Pseudo_instruction* startpgm = add_startpgm(&ctx); append_logical_start(ctx.block); - if (unlikely(args->options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs)) + if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs)) fix_ls_vgpr_init_bug(&ctx, startpgm); split_arguments(&ctx, startpgm); - if (!args->shader_info->vs.has_prolog && + if (!info->vs.has_prolog && (program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) { Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u); } @@ -11693,9 +11695,11 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const void select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, const struct radv_shader_args* args) { - isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, args, true); + isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, options, info, args, true); ctx.block->fp_mode = program->next_fp_mode; @@ -11708,7 +11712,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ program->private_segment_buffer, Operand::c32(RING_GSVS_VS * 16u)); Operand stream_id = Operand::zero(); - if (args->shader_info->so.num_outputs) + if (program->info->so.num_outputs) stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), get_arg(&ctx, ctx.args->ac.streamout_config), Operand::c32(0x20018u)); @@ -11721,8 +11725,8 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ if (stream_id.isConstant() && stream != stream_id.constantValue()) continue; - unsigned num_components = args->shader_info->gs.num_stream_output_components[stream]; - if (stream > 0 && (!num_components || !args->shader_info->so.num_outputs)) + unsigned num_components = program->info->gs.num_stream_output_components[stream]; + if (stream > 0 && (!num_components || !program->info->so.num_outputs)) continue; memset(ctx.outputs.mask, 0, sizeof(ctx.outputs.mask)); @@ -11737,17 +11741,17 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ unsigned offset = 0; for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) { - if (args->shader_info->gs.output_streams[i] != stream) + if (program->info->gs.output_streams[i] != stream) continue; - unsigned output_usage_mask = args->shader_info->gs.output_usage_mask[i]; + unsigned output_usage_mask = program->info->gs.output_usage_mask[i]; unsigned length = util_last_bit(output_usage_mask); for (unsigned j = 0; j < length; ++j) { if (!(output_usage_mask & (1 << j))) continue; Temp val = bld.tmp(v1); - unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4; + unsigned const_offset = offset * program->info->gs.vertices_out * 16 * 4; load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1, 0u, true, true, true); @@ -11758,7 +11762,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ } } - if (args->shader_info->so.num_outputs) { + if (program->info->so.num_outputs) { emit_streamout(&ctx, stream); bld.reset(ctx.block); } @@ -11790,17 +11794,19 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ void select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, const struct radv_shader_args* args) { - assert(args->options->chip_class == GFX8); + assert(options->chip_class == GFX8); - init_program(program, compute_cs, args->shader_info, args->options->chip_class, - args->options->family, args->options->wgp_mode, config); + init_program(program, compute_cs, info, options->chip_class, + options->family, options->wgp_mode, config); isel_context ctx = {}; ctx.program = program; ctx.args = args; - ctx.options = args->options; + ctx.options = options; ctx.stage = program->stage; ctx.block = ctx.program->create_and_insert_block(); @@ -11952,16 +11958,18 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, const struct radv_shader_args* args, unsigned* num_preserved_sgprs) { assert(key->num_attributes > 0); /* This should be enough for any shader/stage. */ - unsigned max_user_sgprs = args->options->chip_class >= GFX9 ? 32 : 16; + unsigned max_user_sgprs = options->chip_class >= GFX9 ? 32 : 16; *num_preserved_sgprs = max_user_sgprs + 14; - init_program(program, compute_cs, args->shader_info, args->options->chip_class, - args->options->family, args->options->wgp_mode, config); + init_program(program, compute_cs, info, options->chip_class, + options->family, options->wgp_mode, config); Block* block = program->create_and_insert_block(); block->kind = block_kind_top_level; @@ -12001,7 +12009,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1), get_arg_fixed(args, args->ac.vertex_buffers)); bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers.advance(4), s1), - Operand::c32((unsigned)args->options->address32_hi)); + Operand::c32((unsigned)options->address32_hi)); /* calculate vgpr requirements */ unsigned num_vgprs = attributes_start.reg() - 256; diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 0898246..65f74b3 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -116,6 +116,8 @@ void cleanup_context(isel_context* ctx); isel_context setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders, ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, const struct radv_shader_args* args, bool is_gs_copy_shader); } // namespace aco diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index d593dd4..8ad3a51 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -273,7 +273,7 @@ setup_vs_variables(isel_context* ctx, nir_shader* nir) /* TODO: NGG streamout */ if (ctx->stage.hw == HWStage::NGG) - assert(!ctx->args->shader_info->so.num_outputs); + assert(!ctx->program->info->so.num_outputs); } if (ctx->stage == vertex_ngg) { @@ -301,23 +301,23 @@ setup_gs_variables(isel_context* ctx, nir_shader* nir) void setup_tcs_info(isel_context* ctx, nir_shader* nir, nir_shader* vs) { - ctx->tcs_in_out_eq = ctx->args->shader_info->vs.tcs_in_out_eq; - ctx->tcs_temp_only_inputs = ctx->args->shader_info->vs.tcs_temp_only_input_mask; - ctx->tcs_num_patches = ctx->args->shader_info->num_tess_patches; - ctx->program->config->lds_size = ctx->args->shader_info->tcs.num_lds_blocks; + ctx->tcs_in_out_eq = ctx->program->info->vs.tcs_in_out_eq; + ctx->tcs_temp_only_inputs = ctx->program->info->vs.tcs_temp_only_input_mask; + ctx->tcs_num_patches = ctx->program->info->num_tess_patches; + ctx->program->config->lds_size = ctx->program->info->tcs.num_lds_blocks; } void setup_tes_variables(isel_context* ctx, nir_shader* nir) { - ctx->tcs_num_patches = ctx->args->shader_info->num_tess_patches; + ctx->tcs_num_patches = ctx->program->info->num_tess_patches; if (ctx->stage == tess_eval_vs || ctx->stage == tess_eval_ngg) { setup_vs_output_info(ctx, nir, &ctx->program->info->tes.outinfo); /* TODO: NGG streamout */ if (ctx->stage.hw == HWStage::NGG) - assert(!ctx->args->shader_info->so.num_outputs); + assert(!ctx->program->info->so.num_outputs); } if (ctx->stage == tess_eval_ngg) { @@ -388,9 +388,9 @@ init_context(isel_context* ctx, nir_shader* shader) ctx->range_ht = _mesa_pointer_hash_table_create(NULL); ctx->ub_config.min_subgroup_size = 64; ctx->ub_config.max_subgroup_size = 64; - if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->args->shader_info->cs.subgroup_size) { - ctx->ub_config.min_subgroup_size = ctx->args->shader_info->cs.subgroup_size; - ctx->ub_config.max_subgroup_size = ctx->args->shader_info->cs.subgroup_size; + if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->program->info->cs.subgroup_size) { + ctx->ub_config.min_subgroup_size = ctx->program->info->cs.subgroup_size; + ctx->ub_config.max_subgroup_size = ctx->program->info->cs.subgroup_size; } ctx->ub_config.max_workgroup_invocations = 2048; ctx->ub_config.max_workgroup_count[0] = 65535; @@ -797,8 +797,8 @@ init_context(isel_context* ctx, nir_shader* shader) } } - ctx->program->config->spi_ps_input_ena = ctx->args->shader_info->ps.spi_ps_input; - ctx->program->config->spi_ps_input_addr = ctx->args->shader_info->ps.spi_ps_input; + ctx->program->config->spi_ps_input_ena = ctx->program->info->ps.spi_ps_input; + ctx->program->config->spi_ps_input_addr = ctx->program->info->ps.spi_ps_input; ctx->cf_info.nir_to_aco = std::move(nir_to_aco); @@ -819,7 +819,9 @@ cleanup_context(isel_context* ctx) isel_context setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders, - ac_shader_config* config, const struct radv_shader_args* args, bool is_gs_copy_shader) + ac_shader_config* config, const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_shader_args* args, bool is_gs_copy_shader) { SWStage sw_stage = SWStage::None; for (unsigned i = 0; i < shader_count; i++) { @@ -835,12 +837,12 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c default: unreachable("Shader stage not implemented"); } } - bool gfx9_plus = args->options->chip_class >= GFX9; - bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10; + bool gfx9_plus = options->chip_class >= GFX9; + bool ngg = info->is_ngg && options->chip_class >= GFX10; HWStage hw_stage{}; - if (sw_stage == SWStage::VS && args->shader_info->vs.as_es && !ngg) + if (sw_stage == SWStage::VS && info->vs.as_es && !ngg) hw_stage = HWStage::ES; - else if (sw_stage == SWStage::VS && !args->shader_info->vs.as_ls && !ngg) + else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg) hw_stage = HWStage::VS; else if (sw_stage == SWStage::VS && ngg) hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */ @@ -856,17 +858,17 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */ else if (sw_stage == SWStage::VS_GS && ngg) hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */ - else if (sw_stage == SWStage::VS && args->shader_info->vs.as_ls) + else if (sw_stage == SWStage::VS && info->vs.as_ls) hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */ else if (sw_stage == SWStage::TCS) hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */ else if (sw_stage == SWStage::VS_TCS) hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */ - else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && !ngg) + else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg) hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */ - else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && ngg) + else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg) hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */ - else if (sw_stage == SWStage::TES && args->shader_info->tes.as_es && !ngg) + else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg) hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */ else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg) hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */ @@ -875,16 +877,16 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c else unreachable("Shader stage not implemented"); - init_program(program, Stage{hw_stage, sw_stage}, args->shader_info, args->options->chip_class, - args->options->family, args->options->wgp_mode, config); + init_program(program, Stage{hw_stage, sw_stage}, info, options->chip_class, + options->family, options->wgp_mode, config); isel_context ctx = {}; ctx.program = program; ctx.args = args; - ctx.options = args->options; + ctx.options = options; ctx.stage = program->stage; - program->workgroup_size = args->shader_info->workgroup_size; + program->workgroup_size = program->info->workgroup_size; assert(program->workgroup_size); if (ctx.stage == tess_control_hs) @@ -897,7 +899,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c unsigned scratch_size = 0; if (program->stage == gs_copy_vs) { assert(shader_count == 1); - setup_vs_output_info(&ctx, shaders[0], &args->shader_info->vs.outinfo); + setup_vs_output_info(&ctx, shaders[0], &program->info->vs.outinfo); } else { for (unsigned i = 0; i < shader_count; i++) { nir_shader* nir = shaders[i]; diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index d076f9f..6cdd74d 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -74,29 +74,32 @@ validate(aco::Program* program) } void -aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, - struct radv_shader_binary** binary, const struct radv_shader_args* args) +aco_compile_shader(const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + unsigned shader_count, struct nir_shader* const* shaders, + const struct radv_shader_args *args, + struct radv_shader_binary** binary) { aco::init(); ac_shader_config config = {0}; std::unique_ptr program{new aco::Program}; - program->collect_statistics = args->options->record_stats; + program->collect_statistics = options->record_stats; if (program->collect_statistics) memset(program->statistics, 0, sizeof(program->statistics)); - program->debug.func = args->options->debug.func; - program->debug.private_data = args->options->debug.private_data; + program->debug.func = options->debug.func; + program->debug.private_data = options->debug.private_data; /* Instruction Selection */ if (args->is_gs_copy_shader) - aco::select_gs_copy_shader(program.get(), shaders[0], &config, args); + aco::select_gs_copy_shader(program.get(), shaders[0], &config, options, info, args); else if (args->is_trap_handler_shader) - aco::select_trap_handler_shader(program.get(), shaders[0], &config, args); + aco::select_trap_handler_shader(program.get(), shaders[0], &config, options, info, args); else - aco::select_program(program.get(), shader_count, shaders, &config, args); - if (args->options->dump_preoptir) + aco::select_program(program.get(), shader_count, shaders, &config, options, info, args); + if (options->dump_preoptir) aco_print_program(program.get(), stderr); aco::live live_vars; @@ -107,7 +110,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, validate(program.get()); /* Optimization */ - if (!args->options->key.optimisations_disabled) { + if (!options->key.optimisations_disabled) { if (!(aco::debug_flags & aco::DEBUG_NO_VN)) aco::value_numbering(program.get()); if (!(aco::debug_flags & aco::DEBUG_NO_OPT)) @@ -125,7 +128,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, } std::string llvm_ir; - if (args->options->record_ir) { + if (options->record_ir) { char* data = NULL; size_t size = 0; u_memstream mem; @@ -143,11 +146,11 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, if (program->collect_statistics) aco::collect_presched_stats(program.get()); - if ((aco::debug_flags & aco::DEBUG_LIVE_INFO) && args->options->dump_shader) + if ((aco::debug_flags & aco::DEBUG_LIVE_INFO) && options->dump_shader) aco_print_program(program.get(), stderr, live_vars, aco::print_live_vars | aco::print_kill); if (!args->is_trap_handler_shader) { - if (!args->options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED)) + if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED)) aco::schedule_program(program.get(), live_vars); validate(program.get()); @@ -157,14 +160,14 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, if (aco::validate_ra(program.get())) { aco_print_program(program.get(), stderr); abort(); - } else if (args->options->dump_shader) { + } else if (options->dump_shader) { aco_print_program(program.get(), stderr); } validate(program.get()); /* Optimization */ - if (!args->options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) { + if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) { aco::optimize_postRA(program.get()); validate(program.get()); } @@ -192,7 +195,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, if (program->collect_statistics) aco::collect_postasm_stats(program.get(), code); - bool get_disasm = args->options->dump_shader || args->options->record_ir; + bool get_disasm = options->dump_shader || options->record_ir; size_t size = llvm_ir.size(); @@ -266,8 +269,11 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, } void -aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_binary** binary, - const struct radv_shader_args* args) +aco_compile_vs_prolog(const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_vs_prolog_key* key, + const struct radv_shader_args* args, + struct radv_prolog_binary** binary) { aco::init(); @@ -280,10 +286,10 @@ aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_b /* create IR */ unsigned num_preserved_sgprs; - aco::select_vs_prolog(program.get(), key, &config, args, &num_preserved_sgprs); + aco::select_vs_prolog(program.get(), key, &config, options, info, args, &num_preserved_sgprs); aco::insert_NOPs(program.get()); - if (args->options->dump_shader) + if (options->dump_shader) aco_print_program(program.get(), stderr); /* assembly */ @@ -291,7 +297,7 @@ aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_b code.reserve(align(program->blocks[0].instructions.size() * 2, 16)); unsigned exec_size = aco::emit_program(program.get(), code); - if (args->options->dump_shader) { + if (options->dump_shader) { aco::print_asm(program.get(), code, exec_size / 4u, stderr); fprintf(stderr, "\n"); } diff --git a/src/amd/compiler/aco_interface.h b/src/amd/compiler/aco_interface.h index 33d2762..b0b5c74 100644 --- a/src/amd/compiler/aco_interface.h +++ b/src/amd/compiler/aco_interface.h @@ -41,11 +41,17 @@ struct aco_compiler_statistic_info { extern const unsigned aco_num_statistics; extern const struct aco_compiler_statistic_info* aco_statistic_infos; -void aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders, - struct radv_shader_binary** binary, const struct radv_shader_args* args); - -void aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_binary** binary, - const struct radv_shader_args* args); +void aco_compile_shader(const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + unsigned shader_count, struct nir_shader* const* shaders, + const struct radv_shader_args *args, + struct radv_shader_binary** binary); + +void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_vs_prolog_key* key, + const struct radv_shader_args* args, + struct radv_prolog_binary** binary); #ifdef __cplusplus } diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 8de4f45..efab59f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2143,13 +2143,23 @@ void init_program(Program* program, Stage stage, const struct radv_shader_info* ac_shader_config* config); void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders, - ac_shader_config* config, const struct radv_shader_args* args); + ac_shader_config* config, const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_shader_args* args); void select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, const struct radv_shader_args* args); void select_trap_handler_shader(Program* program, struct nir_shader* shader, - ac_shader_config* config, const struct radv_shader_args* args); + ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_shader_args* args); void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, - ac_shader_config* config, const struct radv_shader_args* args, + ac_shader_config* config, + const struct radv_nir_compiler_options* options, + const struct radv_shader_info* info, + const struct radv_shader_args* args, unsigned* num_preserved_sgprs); void lower_phis(Program* program); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 617a791..46d3613 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -44,6 +44,8 @@ struct radv_shader_context { struct ac_llvm_context ac; const struct nir_shader *shader; struct ac_shader_abi abi; + const struct radv_nir_compiler_options *options; + struct radv_shader_info *shader_info; const struct radv_shader_args *args; gl_shader_stage stage; @@ -111,8 +113,8 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil static void load_descriptor_sets(struct radv_shader_context *ctx) { - struct radv_userdata_locations *user_sgprs_locs = &ctx->args->shader_info->user_sgprs_locs; - uint32_t mask = ctx->args->shader_info->desc_set_used_mask; + struct radv_userdata_locations *user_sgprs_locs = &ctx->shader_info->user_sgprs_locs; + uint32_t mask = ctx->shader_info->desc_set_used_mask; if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) { LLVMValueRef desc_sets = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]); @@ -168,7 +170,7 @@ static void create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage) { if (ctx->ac.chip_class >= GFX10) { - if (is_pre_gs_stage(stage) && ctx->args->shader_info->is_ngg) { + if (is_pre_gs_stage(stage) && ctx->shader_info->is_ngg) { /* On GFX10, VS is merged into GS for NGG. */ stage = MESA_SHADER_GEOMETRY; has_previous_stage = true; @@ -178,7 +180,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has ctx->main_function = create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac, get_llvm_calling_convention(ctx->main_function, stage), - ctx->max_workgroup_size, ctx->args->options); + ctx->max_workgroup_size, ctx->options); ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), NULL, 0, @@ -189,7 +191,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has load_descriptor_sets(ctx); if (stage == MESA_SHADER_TESS_CTRL || - (stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.as_ls) || + (stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) || /* GFX9 has the ESGS ring buffer in LDS. */ (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { ac_declare_lds_as_pointer(&ctx->ac); @@ -202,7 +204,7 @@ radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_ { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; - struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout; + struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; unsigned base_offset = layout->binding[binding].offset; LLVMValueRef offset, stride; @@ -261,7 +263,7 @@ load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id) ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), ""); - uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->args->options->key.ps.num_samples); + uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->options->key.ps.num_samples); sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), ""); @@ -276,10 +278,10 @@ load_sample_mask_in(struct ac_shader_abi *abi) struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); uint8_t log2_ps_iter_samples; - if (ctx->args->shader_info->ps.uses_sample_shading) { - log2_ps_iter_samples = util_logbase2(ctx->args->options->key.ps.num_samples); + if (ctx->shader_info->ps.uses_sample_shading) { + log2_ps_iter_samples = util_logbase2(ctx->options->key.ps.num_samples); } else { - log2_ps_iter_samples = ctx->args->options->key.ps.log2_ps_iter_samples; + log2_ps_iter_samples = ctx->options->key.ps.log2_ps_iter_samples; } LLVMValueRef result, sample_id; @@ -306,14 +308,14 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV unsigned offset = 0; struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - if (ctx->args->shader_info->is_ngg) { + if (ctx->shader_info->is_ngg) { gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs); return; } for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; - uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i]; + unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i]; + uint8_t output_stream = ctx->shader_info->gs.output_streams[i]; LLVMValueRef *out_ptr = &addrs[i * 4]; int length = util_last_bit(output_usage_mask); @@ -351,7 +353,7 @@ visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - if (ctx->args->shader_info->is_ngg) { + if (ctx->shader_info->is_ngg) { LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]); return; } @@ -406,7 +408,7 @@ get_desc_ptr(struct radv_shader_context *ctx, LLVMValueRef ptr, bool non_uniform if (non_uniform) { /* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */ LLVMValueRef dwords[] = {ptr, - LLVMConstInt(ctx->ac.i32, ctx->args->options->address32_hi, false)}; + LLVMConstInt(ctx->ac.i32, ctx->options->address32_hi, false)}; ptr = ac_build_gather_values(&ctx->ac, dwords, 2); ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, ""); addr_space = AC_ADDR_SPACE_CONST; @@ -439,7 +441,7 @@ radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bo LLVMValueRef result; if (valid_binding) { - struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout; + struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) { @@ -461,7 +463,7 @@ radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bo LLVMValueRef desc_components[4] = { LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""), - LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi), + LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi), false), LLVMConstInt(ctx->ac.i32, 0xffffffff, false), LLVMConstInt(ctx->ac.i32, desc_type, false), @@ -489,7 +491,7 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; struct radv_descriptor_set_layout *layout = - ctx->args->options->layout->set[descriptor_set].layout; + ctx->options->layout->set[descriptor_set].layout; struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; unsigned offset = binding->offset; unsigned stride = binding->size; @@ -584,7 +586,7 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i); descriptor = ac_build_gather_values(&ctx->ac, components, 8); } else if (desc_type == AC_DESC_IMAGE && - ctx->args->options->has_image_load_dcc_bug && + ctx->options->has_image_load_dcc_bug && image && !write) { LLVMValueRef components[8]; @@ -684,17 +686,17 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp LLVMValueRef input; LLVMValueRef buffer_index; unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0; - unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index]; + unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index]; unsigned data_format = attrib_format & 0x0f; unsigned num_format = (attrib_format >> 4) & 0x07; bool is_float = num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT; uint8_t input_usage_mask = - ctx->args->shader_info->vs.input_usage_mask[driver_location]; + ctx->shader_info->vs.input_usage_mask[driver_location]; unsigned num_input_channels = util_last_bit(input_usage_mask); - if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { - uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index]; + if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { + uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; if (divisor) { buffer_index = ctx->abi.instance_id; @@ -718,19 +720,19 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp /* Adjust the number of channels to load based on the vertex attribute format. */ unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels); - unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index]; - unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index]; - unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index]; - unsigned alpha_adjust = ctx->args->options->key.vs.vertex_alpha_adjust[attrib_index]; + unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index]; + unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index]; + unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index]; + unsigned alpha_adjust = ctx->options->key.vs.vertex_alpha_adjust[attrib_index]; - if (ctx->args->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) { + if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) { /* Always load, at least, 3 channels for formats that need to be shuffled because X<->Z. */ num_channels = MAX2(num_channels, 3); } unsigned desc_index = - ctx->args->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding; - desc_index = util_bitcount(ctx->args->shader_info->vs.vb_desc_usage_mask & + ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding; + desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask & u_bit_consecutive(0, desc_index)); t_offset = LLVMConstInt(ctx->ac.i32, desc_index, false); t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); @@ -780,7 +782,7 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true); } - if (ctx->args->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) { + if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) { LLVMValueRef c[4]; c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2); c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1); @@ -904,9 +906,9 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values, bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2; if (ctx->stage == MESA_SHADER_FRAGMENT) { unsigned index = target - V_008DFC_SQ_EXP_MRT; - unsigned col_format = (ctx->args->options->key.ps.col_format >> (4 * index)) & 0xf; - bool is_int8 = (ctx->args->options->key.ps.is_int8 >> index) & 1; - bool is_int10 = (ctx->args->options->key.ps.is_int10 >> index) & 1; + unsigned col_format = (ctx->options->key.ps.col_format >> (4 * index)) & 0xf; + bool is_int8 = (ctx->options->key.ps.is_int8 >> index) & 1; + bool is_int10 = (ctx->options->key.ps.is_int10 >> index) & 1; LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL; LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits, @@ -989,7 +991,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values, /* Replace NaN by zero (only 32-bit) to fix game bugs if * requested. */ - if (ctx->args->options->enable_mrt_output_nan_fixup && !is_16bit && + if (ctx->options->enable_mrt_output_nan_fixup && !is_16bit && (col_format == V_028714_SPI_SHADER_32_R || col_format == V_028714_SPI_SHADER_32_GR || col_format == V_028714_SPI_SHADER_32_AR || col_format == V_028714_SPI_SHADER_32_ABGR || col_format == V_028714_SPI_SHADER_FP16_ABGR)) { @@ -1145,7 +1147,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers); for (i = 0; i < 4; i++) { - uint16_t stride = ctx->args->shader_info->so.strides[i]; + uint16_t stride = ctx->shader_info->so.strides[i]; if (!stride) continue; @@ -1164,9 +1166,9 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream) } /* Write streamout data. */ - for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) { + for (i = 0; i < ctx->shader_info->so.num_outputs; i++) { struct radv_shader_output_values shader_out = {0}; - struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i]; + struct radv_stream_output *output = &ctx->shader_info->so.outputs[i]; if (stream != output->stream) continue; @@ -1260,7 +1262,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v } bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate || - ctx->args->options->force_vrs_rates; + ctx->options->force_vrs_rates; if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer || outinfo->writes_viewport_index || writes_primitive_shading_rate) { @@ -1281,7 +1283,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v if (outinfo->writes_layer == true) pos_args[1].out[2] = layer_value; if (outinfo->writes_viewport_index == true) { - if (ctx->args->options->chip_class >= GFX9) { + if (ctx->options->chip_class >= GFX9) { /* GFX9 has the layer in out.z[10:0] and the viewport * index in out.z[19:16]. */ @@ -1300,7 +1302,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v if (outinfo->writes_primitive_shading_rate) { pos_args[1].out[1] = primitive_shading_rate; - } else if (ctx->args->options->force_vrs_rates) { + } else if (ctx->options->force_vrs_rates) { /* Bits [2:3] = VRS rate X * Bits [4:5] = VRS rate Y * @@ -1312,7 +1314,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v * * Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time. */ - LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->args->options->force_vrs_rates, false); + LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false); LLVMValueRef cond; LLVMValueRef v; @@ -1356,7 +1358,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo struct radv_shader_output_values *outputs; unsigned noutput = 0; - if (ctx->args->options->key.has_multiview_view_index) { + if (ctx->options->key.has_multiview_view_index) { LLVMValueRef *tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; if (!*tmp_out) { for (unsigned i = 0; i < 4; ++i) @@ -1369,7 +1371,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; } - if (ctx->args->shader_info->so.num_outputs && !ctx->args->is_gs_copy_shader) { + if (ctx->shader_info->so.num_outputs && !ctx->args->is_gs_copy_shader) { /* The GS copy shader emission already emits streamout. */ radv_emit_streamout(ctx, 0); } @@ -1386,12 +1388,12 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1; if (ctx->stage == MESA_SHADER_VERTEX && !ctx->args->is_gs_copy_shader) { - outputs[noutput].usage_mask = ctx->args->shader_info->vs.output_usage_mask[i]; + outputs[noutput].usage_mask = ctx->shader_info->vs.output_usage_mask[i]; } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - outputs[noutput].usage_mask = ctx->args->shader_info->tes.output_usage_mask[i]; + outputs[noutput].usage_mask = ctx->shader_info->tes.output_usage_mask[i]; } else { assert(ctx->args->is_gs_copy_shader); - outputs[noutput].usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; + outputs[noutput].usage_mask = ctx->shader_info->gs.output_usage_mask[i]; } for (unsigned j = 0; j < 4; j++) { @@ -1463,7 +1465,7 @@ ngg_gs_get_vertex_storage(struct radv_shader_context *ctx) { unsigned num_outputs = util_bitcount64(ctx->output_mask); - if (ctx->args->options->key.has_multiview_view_index) + if (ctx->options->key.has_multiview_view_index) num_outputs++; LLVMTypeRef elements[2] = { @@ -1601,14 +1603,14 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) /* Copy Primitive IDs from GS threads to the LDS address corresponding * to the ES thread of the provoking vertex. */ - if (ctx->stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.outinfo.export_prim_id) { + if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.outinfo.export_prim_id) { ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); LLVMValueRef provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, 0, false); /* For provoking vertex last mode, use num_vtx_in_prim - 1. */ - if (ctx->args->options->key.vs.provoking_vtx_last) { - uint8_t outprim = si_conv_prim_to_gs_out(ctx->args->options->key.vs.topology); + if (ctx->options->key.vs.provoking_vtx_last) { + uint8_t outprim = si_conv_prim_to_gs_out(ctx->options->key.vs.topology); provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, outprim, false); } @@ -1641,7 +1643,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) { struct ac_ngg_prim prim = {0}; - if (ctx->args->shader_info->is_ngg_passthrough) { + if (ctx->shader_info->is_ngg_passthrough) { prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]); } else { prim.num_vertices = num_vertices; @@ -1658,8 +1660,8 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) ac_build_ifcc(&ctx->ac, is_es_thread, 6002); { struct radv_vs_output_info *outinfo = ctx->stage == MESA_SHADER_TESS_EVAL - ? &ctx->args->shader_info->tes.outinfo - : &ctx->args->shader_info->vs.outinfo; + ? &ctx->shader_info->tes.outinfo + : &ctx->shader_info->vs.outinfo; /* Exporting the primitive ID is handled below. */ /* TODO: use the new VS export path */ @@ -1736,7 +1738,7 @@ gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx) for (unsigned stream = 0; stream < 4; ++stream) { unsigned num_components; - num_components = ctx->args->shader_info->gs.num_stream_output_components[stream]; + num_components = ctx->shader_info->gs.num_stream_output_components[stream]; if (!num_components) continue; @@ -1764,7 +1766,7 @@ gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx) for (unsigned stream = 0; stream < 4; ++stream) { unsigned num_components; - num_components = ctx->args->shader_info->gs.num_stream_output_components[stream]; + num_components = ctx->shader_info->gs.num_stream_output_components[stream]; if (!num_components) continue; @@ -1922,7 +1924,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, ""); LLVMValueRef flatshade_first = - LLVMConstInt(ctx->ac.i1, !ctx->args->options->key.vs.provoking_vtx_last, false); + LLVMConstInt(ctx->ac.i1, !ctx->options->key.vs.provoking_vtx_last, false); ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, prim.index); } @@ -1935,8 +1937,8 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, ""); ac_build_ifcc(&ctx->ac, tmp, 5145); { - struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo; - bool export_view_index = ctx->args->options->key.has_multiview_view_index; + struct radv_vs_output_info *outinfo = &ctx->shader_info->vs.outinfo; + bool export_view_index = ctx->options->key.has_multiview_view_index; struct radv_shader_output_values *outputs; unsigned noutput = 0; @@ -1951,7 +1953,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx) unsigned out_idx = 0; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; + unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i]; int length = util_last_bit(output_usage_mask); if (!(ctx->output_mask & (1ull << i))) @@ -2011,8 +2013,8 @@ gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMV const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx); unsigned out_idx = 0; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; - uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i]; + unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i]; + uint8_t output_stream = ctx->shader_info->gs.output_streams[i]; LLVMValueRef *out_ptr = &addrs[i * 4]; int length = util_last_bit(output_usage_mask); @@ -2030,7 +2032,7 @@ gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMV LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx)); } } - assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size); + assert(out_idx * 4 <= ctx->shader_info->gs.gsvs_vertex_size); /* Store the current number of emitted vertices to zero out remaining * primitive flags in case the geometry shader doesn't emit the maximum @@ -2122,22 +2124,22 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) } /* Process depth, stencil, samplemask. */ - if (ctx->args->shader_info->ps.writes_z) { + if (ctx->shader_info->ps.writes_z) { depth = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); } - if (ctx->args->shader_info->ps.writes_stencil) { + if (ctx->shader_info->ps.writes_stencil) { stencil = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); } - if (ctx->args->shader_info->ps.writes_sample_mask) { + if (ctx->shader_info->ps.writes_sample_mask) { samplemask = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); } /* Set the DONE bit on last non-null color export only if Z isn't * exported. */ - if (index > 0 && !ctx->args->shader_info->ps.writes_z && - !ctx->args->shader_info->ps.writes_stencil && - !ctx->args->shader_info->ps.writes_sample_mask) { + if (index > 0 && !ctx->shader_info->ps.writes_z && + !ctx->shader_info->ps.writes_stencil && + !ctx->shader_info->ps.writes_sample_mask) { unsigned last = index - 1; color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ @@ -2157,7 +2159,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) static void emit_gs_epilogue(struct radv_shader_context *ctx) { - if (ctx->args->shader_info->is_ngg) { + if (ctx->shader_info->is_ngg) { gfx10_ngg_gs_emit_epilogue_1(ctx); return; } @@ -2175,16 +2177,16 @@ handle_shader_outputs_post(struct ac_shader_abi *abi) switch (ctx->stage) { case MESA_SHADER_VERTEX: - if (ctx->args->shader_info->vs.as_ls) + if (ctx->shader_info->vs.as_ls) break; /* Lowered in NIR */ - else if (ctx->args->shader_info->vs.as_es) + else if (ctx->shader_info->vs.as_es) break; /* Lowered in NIR */ - else if (ctx->args->shader_info->is_ngg) + else if (ctx->shader_info->is_ngg) break; else - handle_vs_outputs_post(ctx, ctx->args->shader_info->vs.outinfo.export_prim_id, - ctx->args->shader_info->vs.outinfo.export_clip_dists, - &ctx->args->shader_info->vs.outinfo); + handle_vs_outputs_post(ctx, ctx->shader_info->vs.outinfo.export_prim_id, + ctx->shader_info->vs.outinfo.export_clip_dists, + &ctx->shader_info->vs.outinfo); break; case MESA_SHADER_FRAGMENT: handle_fs_outputs_post(ctx); @@ -2195,14 +2197,14 @@ handle_shader_outputs_post(struct ac_shader_abi *abi) case MESA_SHADER_TESS_CTRL: break; /* Lowered in NIR */ case MESA_SHADER_TESS_EVAL: - if (ctx->args->shader_info->tes.as_es) + if (ctx->shader_info->tes.as_es) break; /* Lowered in NIR */ - else if (ctx->args->shader_info->is_ngg) + else if (ctx->shader_info->is_ngg) break; else - handle_vs_outputs_post(ctx, ctx->args->shader_info->tes.outinfo.export_prim_id, - ctx->args->shader_info->tes.outinfo.export_clip_dists, - &ctx->args->shader_info->tes.outinfo); + handle_vs_outputs_post(ctx, ctx->shader_info->tes.outinfo.export_prim_id, + ctx->shader_info->tes.outinfo.export_clip_dists, + &ctx->shader_info->tes.outinfo); break; default: break; @@ -2210,8 +2212,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi) } static void -ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr, - const struct radv_nir_compiler_options *options) +ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr) { LLVMRunPassManager(passmgr, ctx->ac.module); LLVMDisposeBuilder(ctx->ac.builder); @@ -2231,15 +2232,15 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) case MESA_SHADER_GEOMETRY: return; case MESA_SHADER_VERTEX: - if (ctx->args->shader_info->vs.as_ls || - ctx->args->shader_info->vs.as_es) + if (ctx->shader_info->vs.as_ls || + ctx->shader_info->vs.as_es) return; - outinfo = &ctx->args->shader_info->vs.outinfo; + outinfo = &ctx->shader_info->vs.outinfo; break; case MESA_SHADER_TESS_EVAL: - if (ctx->args->shader_info->tes.as_es) + if (ctx->shader_info->tes.as_es) return; - outinfo = &ctx->args->shader_info->tes.outinfo; + outinfo = &ctx->shader_info->tes.outinfo; break; default: unreachable("Unhandled shader type"); @@ -2252,10 +2253,10 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) static void ac_setup_rings(struct radv_shader_context *ctx) { - if (ctx->args->options->chip_class <= GFX8 && + if (ctx->options->chip_class <= GFX8 && (ctx->stage == MESA_SHADER_GEOMETRY || - (ctx->stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->args->shader_info->tes.as_es))) { + (ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_es) || + (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->shader_info->tes.as_es))) { unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS; LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); @@ -2288,7 +2289,7 @@ ac_setup_rings(struct radv_shader_context *ctx) unsigned num_components, stride; LLVMValueRef ring, tmp; - num_components = ctx->args->shader_info->gs.num_stream_output_components[stream]; + num_components = ctx->shader_info->gs.num_stream_output_components[stream]; if (!num_components) continue; @@ -2384,11 +2385,16 @@ declare_esgs_ring(struct radv_shader_context *ctx) } static LLVMModuleRef -ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders, - int shader_count, const struct radv_shader_args *args) +ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, + const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, + struct nir_shader *const *shaders, int shader_count, + const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; ctx.args = args; + ctx.options = options; + ctx.shader_info = info; enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT; @@ -2396,15 +2402,14 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO; } - ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family, - args->options->info, float_mode, args->shader_info->wave_size, - args->shader_info->ballot_bit_size); + ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family, + options->info, float_mode, info->wave_size, info->ballot_bit_size); ctx.context = ctx.ac.context; - ctx.max_workgroup_size = args->shader_info->workgroup_size; + ctx.max_workgroup_size = info->workgroup_size; if (ctx.ac.chip_class >= GFX10) { - if (is_pre_gs_stage(shaders[0]->info.stage) && args->shader_info->is_ngg) { + if (is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg) { ctx.max_workgroup_size = 128; } } @@ -2421,10 +2426,10 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip; ctx.abi.load_ring_esgs = load_ring_esgs; ctx.abi.clamp_shadow_reference = false; - ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z; - ctx.abi.robust_buffer_access = args->options->robust_buffer_access; + ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z; + ctx.abi.robust_buffer_access = options->robust_buffer_access; - bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->shader_info->is_ngg; + bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg; if (shader_count >= 2 || is_ngg) ac_init_exec_full_mask(&ctx.ac); @@ -2435,7 +2440,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co if (args->ac.instance_id.used) ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id); - if (args->options->has_ls_vgpr_init_bug && + if (options->has_ls_vgpr_init_bug && shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) ac_nir_fixup_ls_hs_input_vgprs(&ctx); @@ -2447,7 +2452,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co * Add an extra dword per vertex to ensure an odd stride, which * avoids bank conflicts for SoA accesses. */ - if (!args->shader_info->is_ngg_passthrough) + if (!info->is_ngg_passthrough) declare_esgs_ring(&ctx); /* GFX10 hang workaround - there needs to be an s_barrier before gs_alloc_req always */ @@ -2464,7 +2469,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co for (int i = 0; i < 4; i++) { ctx.gs_next_vertex[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); } - if (args->shader_info->is_ngg) { + if (info->is_ngg) { for (unsigned i = 0; i < 4; ++i) { ctx.gs_curprim_verts[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); ctx.gs_generated_prims[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, ""); @@ -2492,17 +2497,15 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co ctx.abi.load_sample_mask_in = load_sample_mask_in; } - if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX && - args->shader_info->is_ngg && - args->shader_info->vs.outinfo.export_prim_id) { + if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX && info->is_ngg && + info->vs.outinfo.export_prim_id) { declare_esgs_ring(&ctx); } bool nested_barrier = false; if (shader_idx) { - if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && - args->shader_info->is_ngg) { + if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg) { gfx10_ngg_gs_emit_prologue(&ctx); nested_barrier = false; } else { @@ -2565,25 +2568,24 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co /* This needs to be outside the if wrapping the shader body, as sometimes * the HW generates waves with 0 es/vs threads. */ - if (is_pre_gs_stage(shaders[shader_idx]->info.stage) && - args->shader_info->is_ngg && shader_idx == shader_count - 1) { + if (is_pre_gs_stage(shaders[shader_idx]->info.stage) && info->is_ngg && + shader_idx == shader_count - 1) { handle_ngg_outputs_post_2(&ctx); - } else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && - args->shader_info->is_ngg) { + } else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg) { gfx10_ngg_gs_emit_epilogue_2(&ctx); } } LLVMBuildRetVoid(ctx.ac.builder); - if (args->options->dump_preoptir) { + if (options->dump_preoptir) { fprintf(stderr, "%s LLVM IR:\n\n", - radv_get_shader_name(args->shader_info, shaders[shader_count - 1]->info.stage)); + radv_get_shader_name(info, shaders[shader_count - 1]->info.stage)); ac_dump_module(ctx.ac.module); fprintf(stderr, "\n"); } - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr); if (shader_count == 1) ac_nir_eliminate_const_vs_outputs(&ctx); @@ -2673,18 +2675,21 @@ ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_modu } static void -radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary, +radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, + const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, + struct radv_shader_binary **rbinary, const struct radv_shader_args *args, struct nir_shader *const *nir, int nir_count) { LLVMModuleRef llvm_module; - llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args); + llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args); ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, nir[nir_count - 1]->info.stage, - radv_get_shader_name(args->shader_info, nir[nir_count - 1]->info.stage), - args->options); + radv_get_shader_name(info, nir[nir_count - 1]->info.stage), + options); } static void @@ -2696,7 +2701,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) LLVMValueRef stream_id; /* Fetch the vertex stream ID. */ - if (ctx->args->shader_info->so.num_outputs) { + if (ctx->shader_info->so.num_outputs) { stream_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config), 24, 2); } else { @@ -2710,14 +2715,14 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4); for (unsigned stream = 0; stream < 4; stream++) { - unsigned num_components = ctx->args->shader_info->gs.num_stream_output_components[stream]; + unsigned num_components = ctx->shader_info->gs.num_stream_output_components[stream]; LLVMBasicBlockRef bb; unsigned offset; if (stream > 0 && !num_components) continue; - if (stream > 0 && !ctx->args->shader_info->so.num_outputs) + if (stream > 0 && !ctx->shader_info->so.num_outputs) continue; bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out"); @@ -2726,8 +2731,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) offset = 0; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i]; - unsigned output_stream = ctx->args->shader_info->gs.output_streams[i]; + unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i]; + unsigned output_stream = ctx->shader_info->gs.output_streams[i]; int length = util_last_bit(output_usage_mask); if (!(ctx->output_mask & (1ull << i)) || output_stream != stream) @@ -2758,12 +2763,12 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) } } - if (ctx->args->shader_info->so.num_outputs) + if (ctx->shader_info->so.num_outputs) radv_emit_streamout(ctx, stream); if (stream == 0) { - handle_vs_outputs_post(ctx, false, ctx->args->shader_info->vs.outinfo.export_clip_dists, - &ctx->args->shader_info->vs.outinfo); + handle_vs_outputs_post(ctx, false, ctx->shader_info->vs.outinfo.export_clip_dists, + &ctx->shader_info->vs.outinfo); } LLVMBuildBr(ctx->ac.builder, end_bb); @@ -2773,17 +2778,22 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) } static void -radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, +radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, + const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, + struct nir_shader *geom_shader, struct radv_shader_binary **rbinary, const struct radv_shader_args *args) { struct radv_shader_context ctx = {0}; ctx.args = args; + ctx.options = options; + ctx.shader_info = info; assert(args->is_gs_copy_shader); - ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family, - args->options->info, AC_FLOAT_MODE_DEFAULT, 64, 64); + ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family, + options->info, AC_FLOAT_MODE_DEFAULT, 64, 64); ctx.context = ctx.ac.context; ctx.stage = MESA_SHADER_VERTEX; @@ -2803,31 +2813,31 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader LLVMBuildRetVoid(ctx.ac.builder); - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr); ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, MESA_SHADER_VERTEX, "GS Copy Shader", - args->options); + options); (*rbinary)->is_gs_copy_shader = true; } void -llvm_compile_shader(struct radv_device *device, unsigned shader_count, +llvm_compile_shader(const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary, - struct radv_shader_args *args) + const struct radv_shader_args *args) { enum ac_target_machine_options tm_options = 0; struct ac_llvm_compiler ac_llvm; tm_options |= AC_TM_SUPPORTS_SPILL; - if (args->options->check_ir) + if (options->check_ir) tm_options |= AC_TM_CHECK_IR; - radv_init_llvm_compiler(&ac_llvm, args->options->family, tm_options, - args->shader_info->wave_size); + radv_init_llvm_compiler(&ac_llvm, options->family, tm_options, info->wave_size); if (args->is_gs_copy_shader) { - radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args); + radv_compile_gs_copy_shader(&ac_llvm, options, info, *shaders, binary, args); } else { - radv_compile_nir_shader(&ac_llvm, binary, args, shaders, shader_count); + radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count); } } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 5d85260..896a767 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2621,10 +2621,13 @@ struct radv_fence { /* radv_nir_to_llvm.c */ struct radv_shader_args; +struct radv_nir_compiler_options; +struct radv_shader_info; -void llvm_compile_shader(struct radv_device *device, unsigned shader_count, +void llvm_compile_shader(const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, unsigned shader_count, struct nir_shader *const *shaders, struct radv_shader_binary **binary, - struct radv_shader_args *args); + const struct radv_shader_args *args); /* radv_shader_info.h */ struct radv_shader_info; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index e610b92..41554fa 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1782,27 +1782,25 @@ shader_variant_compile(struct radv_device *device, struct vk_shader_module *modu } struct radv_shader_args args = {0}; - args.options = options; - args.shader_info = info; args.is_gs_copy_shader = gs_copy_shader; args.is_trap_handler_shader = trap_handler_shader; - radv_declare_shader_args( - &args, gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage, + radv_declare_shader_args(options, info, + gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage, shader_count >= 2, - shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); + shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX, &args); #ifdef LLVM_AVAILABLE if (radv_use_llvm_for_stage(device, stage) || options->dump_shader || options->record_ir) ac_init_llvm_once(); if (radv_use_llvm_for_stage(device, stage)) { - llvm_compile_shader(device, shader_count, shaders, &binary, &args); + llvm_compile_shader(options, info, shader_count, shaders, &binary, &args); #else if (false) { #endif } else { - aco_compile_shader(shader_count, shaders, &binary, &args); + aco_compile_shader(options, info, shader_count, shaders, &args, &binary); } binary->info = *info; @@ -1962,10 +1960,8 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke info.is_ngg = key->is_ngg; struct radv_shader_args args = {0}; - args.options = &options; - args.shader_info = &info; - radv_declare_shader_args(&args, key->next_stage, key->next_stage != MESA_SHADER_VERTEX, - MESA_SHADER_VERTEX); + radv_declare_shader_args(&options, &info, key->next_stage, key->next_stage != MESA_SHADER_VERTEX, + MESA_SHADER_VERTEX, &args); #ifdef LLVM_AVAILABLE if (options.dump_shader) @@ -1973,7 +1969,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke #endif struct radv_prolog_binary *binary = NULL; - aco_compile_vs_prolog(key, &binary, &args); + aco_compile_vs_prolog(&options, &info, key, &args, &binary); struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size); free(binary); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index e2f1b4a..18efc9f 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -39,26 +39,26 @@ set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs } static void -set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) +set_loc_shader(struct radv_shader_info *info, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) { - struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx]; + struct radv_userdata_info *ud_info = &info->user_sgprs_locs.shader_data[idx]; assert(ud_info); set_loc(ud_info, sgpr_idx, num_sgprs); } static void -set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +set_loc_shader_ptr(struct radv_shader_info*info, int idx, uint8_t *sgpr_idx) { bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS; - set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); + set_loc_shader(info, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); } static void -set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx) +set_loc_desc(struct radv_shader_info *info, int idx, uint8_t *sgpr_idx) { - struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs; + struct radv_userdata_locations *locs = &info->user_sgprs_locs; struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); @@ -75,27 +75,28 @@ struct user_sgpr_info { }; static bool -needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage) +needs_view_index_sgpr(const struct radv_nir_compiler_options *options, + const struct radv_shader_info *info, gl_shader_stage stage) { switch (stage) { case MESA_SHADER_VERTEX: - if (args->shader_info->uses_view_index || - (!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls && - args->options->key.has_multiview_view_index)) + if (info->uses_view_index || + (!info->vs.as_es && !info->vs.as_ls && + options->key.has_multiview_view_index)) return true; break; case MESA_SHADER_TESS_EVAL: - if (args->shader_info->uses_view_index || - (!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index)) + if (info->uses_view_index || + (!info->tes.as_es && options->key.has_multiview_view_index)) return true; break; case MESA_SHADER_TESS_CTRL: - if (args->shader_info->uses_view_index) + if (info->uses_view_index) return true; break; case MESA_SHADER_GEOMETRY: - if (args->shader_info->uses_view_index || - (args->shader_info->is_ngg && args->options->key.has_multiview_view_index)) + if (info->uses_view_index || + (info->is_ngg && options->key.has_multiview_view_index)) return true; break; default: @@ -105,52 +106,53 @@ needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage) } static uint8_t -count_vs_user_sgprs(struct radv_shader_args *args) +count_vs_user_sgprs(const struct radv_shader_info *info) { uint8_t count = 1; /* vertex offset */ - if (args->shader_info->vs.vb_desc_usage_mask) + if (info->vs.vb_desc_usage_mask) count++; - if (args->shader_info->vs.needs_draw_id) + if (info->vs.needs_draw_id) count++; - if (args->shader_info->vs.needs_base_instance) + if (info->vs.needs_base_instance) count++; return count; } static unsigned -count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) +count_ngg_sgprs(const struct radv_shader_info *info, bool has_api_gs) { unsigned count = 0; if (has_api_gs) count += 1; /* ngg_gs_state */ - if (args->shader_info->has_ngg_culling) + if (info->has_ngg_culling) count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */ return count; } static void -allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) +allocate_inline_push_consts(const struct radv_shader_info *info, + struct user_sgpr_info *user_sgpr_info) { uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs; /* Only supported if shaders use push constants. */ - if (args->shader_info->min_push_constant_used == UINT8_MAX) + if (info->min_push_constant_used == UINT8_MAX) return; /* Only supported if shaders don't have indirect push constants. */ - if (args->shader_info->has_indirect_push_constants) + if (info->has_indirect_push_constants) return; /* Only supported for 32-bit push constants. */ - if (!args->shader_info->has_only_32bit_push_constants) + if (!info->has_only_32bit_push_constants) return; uint8_t num_push_consts = - (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4; + (info->max_push_constant_used - info->min_push_constant_used) / 4; /* Check if the number of user SGPRs is large enough. */ if (num_push_consts < remaining_sgprs) { @@ -164,7 +166,7 @@ allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS; if (user_sgpr_info->num_inline_push_consts == num_push_consts && - !args->shader_info->loads_dynamic_offsets) { + !info->loads_dynamic_offsets) { /* Disable the default push constants path if all constants are * inlined and if shaders don't use dynamic descriptors. */ @@ -173,9 +175,10 @@ allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info } static void -allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage, - gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs, - struct user_sgpr_info *user_sgpr_info) +allocate_user_sgprs(const struct radv_nir_compiler_options *options, + const struct radv_shader_info *info, gl_shader_stage stage, + bool has_previous_stage, gl_shader_stage previous_stage, bool needs_view_index, + bool has_api_gs, bool is_gs_copy_shader, struct user_sgpr_info *user_sgpr_info) { uint8_t user_sgpr_count = 0; @@ -185,39 +188,39 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h user_sgpr_count += 2; /* prolog inputs */ - if (args->shader_info->vs.has_prolog) + if (info->vs.has_prolog) user_sgpr_count += 2; switch (stage) { case MESA_SHADER_COMPUTE: - if (args->shader_info->cs.uses_sbt) + if (info->cs.uses_sbt) user_sgpr_count += 1; - if (args->shader_info->cs.uses_grid_size) + if (info->cs.uses_grid_size) user_sgpr_count += 3; - if (args->shader_info->cs.uses_ray_launch_size) + if (info->cs.uses_ray_launch_size) user_sgpr_count += 3; break; case MESA_SHADER_FRAGMENT: break; case MESA_SHADER_VERTEX: - if (!args->is_gs_copy_shader) - user_sgpr_count += count_vs_user_sgprs(args); + if (!is_gs_copy_shader) + user_sgpr_count += count_vs_user_sgprs(info); break; case MESA_SHADER_TESS_CTRL: if (has_previous_stage) { if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_count += count_vs_user_sgprs(args); + user_sgpr_count += count_vs_user_sgprs(info); } break; case MESA_SHADER_TESS_EVAL: break; case MESA_SHADER_GEOMETRY: if (has_previous_stage) { - if (args->shader_info->is_ngg) - user_sgpr_count += count_ngg_sgprs(args, has_api_gs); + if (info->is_ngg) + user_sgpr_count += count_ngg_sgprs(info, has_api_gs); if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_count += count_vs_user_sgprs(args); + user_sgpr_count += count_vs_user_sgprs(info); } } break; @@ -228,16 +231,16 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h if (needs_view_index) user_sgpr_count++; - if (args->shader_info->loads_push_constants) + if (info->loads_push_constants) user_sgpr_count++; - if (args->shader_info->so.num_outputs) + if (info->so.num_outputs) user_sgpr_count++; uint32_t available_sgprs = - args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; + options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; - uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask); + uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask); if (remaining_sgprs < num_desc_set) { user_sgpr_info->indirect_all_descriptor_sets = true; @@ -246,16 +249,17 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set; } - allocate_inline_push_consts(args, user_sgpr_info); + allocate_inline_push_consts(info, user_sgpr_info); } static void -declare_global_input_sgprs(struct radv_shader_args *args, - const struct user_sgpr_info *user_sgpr_info) +declare_global_input_sgprs(const struct radv_shader_info *info, + const struct user_sgpr_info *user_sgpr_info, + struct radv_shader_args *args) { /* 1 for each descriptor set */ if (!user_sgpr_info->indirect_all_descriptor_sets) { - uint32_t mask = args->shader_info->desc_set_used_mask; + uint32_t mask = info->desc_set_used_mask; while (mask) { int i = u_bit_scan(&mask); @@ -266,7 +270,7 @@ declare_global_input_sgprs(struct radv_shader_args *args, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]); } - if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { + if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) { /* 1 for push constants and dynamic descriptors */ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants); } @@ -274,43 +278,45 @@ declare_global_input_sgprs(struct radv_shader_args *args, for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]); } - args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4; + args->ac.base_inline_push_consts = info->min_push_constant_used / 4; - if (args->shader_info->so.num_outputs) { + if (info->so.num_outputs) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers); } } static void -declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage) +declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage) { - if (args->shader_info->vs.has_prolog) + if (info->vs.has_prolog) ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs); if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (args->shader_info->vs.vb_desc_usage_mask) { + if (info->vs.vb_desc_usage_mask) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers); } ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex); - if (args->shader_info->vs.needs_draw_id) { + if (info->vs.needs_draw_id) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id); } - if (args->shader_info->vs.needs_base_instance) { + if (info->vs.needs_base_instance) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance); } } } static void -declare_vs_input_vgprs(struct radv_shader_args *args) +declare_vs_input_vgprs(const struct radv_nir_compiler_options *options, + const struct radv_shader_info *info, struct radv_shader_args *args) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); if (!args->is_gs_copy_shader) { - if (args->shader_info->vs.as_ls) { + if (info->vs.as_ls) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id); - if (args->options->chip_class >= GFX10) { + if (options->chip_class >= GFX10) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); } else { @@ -318,8 +324,8 @@ declare_vs_input_vgprs(struct radv_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */ } } else { - if (args->options->chip_class >= GFX10) { - if (args->shader_info->is_ngg) { + if (options->chip_class >= GFX10) { + if (info->is_ngg) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id); @@ -336,9 +342,9 @@ declare_vs_input_vgprs(struct radv_shader_args *args) } } - if (args->shader_info->vs.dynamic_inputs) { - assert(args->shader_info->vs.use_per_attribute_vb_descs); - unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask); + if (info->vs.dynamic_inputs) { + assert(info->vs.use_per_attribute_vb_descs); + unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask); for (unsigned i = 0; i < num_attributes; i++) ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]); /* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one @@ -349,12 +355,13 @@ declare_vs_input_vgprs(struct radv_shader_args *args) } static void -declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) +declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, + gl_shader_stage stage) { int i; /* Streamout SGPRs. */ - if (args->shader_info->so.num_outputs) { + if (info->so.num_outputs) { assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config); @@ -365,7 +372,7 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage) /* A streamout buffer offset is loaded if the stride is non-zero. */ for (i = 0; i < 4; i++) { - if (!args->shader_info->so.strides[i]) + if (!info->so.strides[i]) continue; ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]); @@ -382,9 +389,10 @@ declare_tes_input_vgprs(struct radv_shader_args *args) } static void -declare_ps_input_vgprs(struct radv_shader_args *args) +declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args, + bool remap_spi_ps_input) { - unsigned spi_ps_input = args->shader_info->ps.spi_ps_input; + unsigned spi_ps_input = info->ps.spi_ps_input; ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample); ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center); @@ -403,7 +411,7 @@ declare_ps_input_vgprs(struct radv_shader_args *args) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */ - if (args->options->remap_spi_ps_input) { + if (remap_spi_ps_input) { /* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then * communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the * VGPR arguments here. @@ -428,13 +436,14 @@ declare_ps_input_vgprs(struct radv_shader_args *args) } static void -declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) +declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args, + bool has_api_gs) { if (has_api_gs) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state); } - if (args->shader_info->has_ngg_culling) { + if (info->has_ngg_culling) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]); @@ -444,22 +453,22 @@ declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs) } static void -set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info, - uint8_t *user_sgpr_idx) +set_global_input_locs(struct radv_shader_info *info, struct radv_shader_args *args, + const struct user_sgpr_info *user_sgpr_info, uint8_t *user_sgpr_idx) { unsigned num_inline_push_consts = 0; if (!user_sgpr_info->indirect_all_descriptor_sets) { for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) { if (args->descriptor_sets[i].used) - set_loc_desc(args, i, user_sgpr_idx); + set_loc_desc(info, i, user_sgpr_idx); } } else { - set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx); } if (args->ac.push_constants.used) { - set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); } for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) { @@ -468,31 +477,31 @@ set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info } if (num_inline_push_consts) { - set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts); + set_loc_shader(info, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts); } if (args->streamout_buffers.used) { - set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx); } } static void -set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage, - uint8_t *user_sgpr_idx) +set_vs_specific_input_locs(struct radv_shader_info *info, struct radv_shader_args *args, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage, uint8_t *user_sgpr_idx) { if (args->prolog_inputs.used) - set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2); + set_loc_shader(info, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2); if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (args->ac.vertex_buffers.used) { - set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx); } unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used + args->ac.start_instance.used; - set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); + set_loc_shader(info, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num); } } @@ -504,15 +513,17 @@ is_pre_gs_stage(gl_shader_stage stage) } void -radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage) +radv_declare_shader_args(const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, gl_shader_stage stage, + bool has_previous_stage, gl_shader_stage previous_stage, + struct radv_shader_args *args) { struct user_sgpr_info user_sgpr_info; - bool needs_view_index = needs_view_index_sgpr(args, stage); + bool needs_view_index = needs_view_index_sgpr(options, info, stage); bool has_api_gs = stage == MESA_SHADER_GEOMETRY; - if (args->options->chip_class >= GFX10) { - if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) { + if (options->chip_class >= GFX10) { + if (is_pre_gs_stage(stage) && info->is_ngg) { /* On GFX10, VS is merged into GS for NGG. */ previous_stage = stage; stage = MESA_SHADER_GEOMETRY; @@ -521,14 +532,14 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, } for (int i = 0; i < MAX_SETS; i++) - args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; for (int i = 0; i < AC_UD_MAX_UD; i++) - args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; - allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index, - has_api_gs, &user_sgpr_info); + allocate_user_sgprs(options, info, stage, has_previous_stage, previous_stage, needs_view_index, + has_api_gs, args->is_gs_copy_shader, &user_sgpr_info); - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets); } @@ -538,31 +549,31 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, switch (stage) { case MESA_SHADER_COMPUTE: - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); - if (args->shader_info->cs.uses_sbt) { + if (info->cs.uses_sbt) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors); } - if (args->shader_info->cs.uses_grid_size) { + if (info->cs.uses_grid_size) { ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups); } - if (args->shader_info->cs.uses_ray_launch_size) { + if (info->cs.uses_ray_launch_size) { ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size); } for (int i = 0; i < 3; i++) { - if (args->shader_info->cs.uses_block_id[i]) { + if (info->cs.uses_block_id[i]) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]); } } - if (args->shader_info->cs.uses_local_invocation_idx) { + if (info->cs.uses_local_invocation_idx) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size); } - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } @@ -570,29 +581,29 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, break; case MESA_SHADER_VERTEX: /* NGG is handled by the GS case */ - assert(!args->shader_info->is_ngg); + assert(!info->is_ngg); - declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); + declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); } - if (args->shader_info->vs.as_es) { + if (info->vs.as_es) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); - } else if (args->shader_info->vs.as_ls) { + } else if (info->vs.as_ls) { /* no extra parameters */ } else { - declare_streamout_sgprs(args, stage); + declare_streamout_sgprs(info, args, stage); } - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } - declare_vs_input_vgprs(args); + declare_vs_input_vgprs(options, info, args); break; case MESA_SHADER_TESS_CTRL: if (has_previous_stage) { @@ -605,9 +616,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown - declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); + declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); @@ -616,9 +627,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids); - declare_vs_input_vgprs(args); + declare_vs_input_vgprs(options, info, args); } else { - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); @@ -626,7 +637,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset); - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id); @@ -635,22 +646,22 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, break; case MESA_SHADER_TESS_EVAL: /* NGG is handled by the GS case */ - assert(!args->shader_info->is_ngg); + assert(!info->is_ngg); - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); - if (args->shader_info->tes.as_es) { + if (info->tes.as_es) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset); } else { - declare_streamout_sgprs(args, stage); + declare_streamout_sgprs(info, args, stage); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset); } - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } declare_tes_input_vgprs(args); @@ -658,7 +669,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, case MESA_SHADER_GEOMETRY: if (has_previous_stage) { // First 6 system regs - if (args->shader_info->is_ngg) { + if (info->is_ngg) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info); } else { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); @@ -672,17 +683,17 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown if (previous_stage != MESA_SHADER_TESS_EVAL) { - declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage); + declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage); } - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); } - if (args->shader_info->is_ngg) { - declare_ngg_sgprs(args, has_api_gs); + if (info->is_ngg) { + declare_ngg_sgprs(info, args, has_api_gs); } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); @@ -692,12 +703,12 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); if (previous_stage == MESA_SHADER_VERTEX) { - declare_vs_input_vgprs(args); + declare_vs_input_vgprs(options, info, args); } else { declare_tes_input_vgprs(args); } } else { - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); if (needs_view_index) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index); @@ -705,7 +716,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id); - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); @@ -719,27 +730,27 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, } break; case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(args, &user_sgpr_info); + declare_global_input_sgprs(info, &user_sgpr_info, args); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask); - if (args->options->explicit_scratch_args) { + if (options->explicit_scratch_args) { ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset); } - declare_ps_input_vgprs(args); + declare_ps_input_vgprs(info, args, options->remap_spi_ps_input); break; default: unreachable("Shader stage not implemented"); } - args->shader_info->num_input_vgprs = 0; - args->shader_info->num_input_sgprs = 2; - args->shader_info->num_input_sgprs += args->ac.num_sgprs_used; - args->shader_info->num_input_vgprs = args->ac.num_vgprs_used; + info->num_input_vgprs = 0; + info->num_input_sgprs = 2; + info->num_input_sgprs += args->ac.num_sgprs_used; + info->num_input_vgprs = args->ac.num_vgprs_used; uint8_t user_sgpr_idx = 0; - set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx); /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ @@ -747,51 +758,51 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, user_sgpr_idx = 0; if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX)) - set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx); + set_vs_specific_input_locs(info, args, stage, has_previous_stage, previous_stage, &user_sgpr_idx); - set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx); + set_global_input_locs(info, args, &user_sgpr_info, &user_sgpr_idx); switch (stage) { case MESA_SHADER_COMPUTE: if (args->ac.sbt_descriptors.used) { - set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); + set_loc_shader_ptr(info, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx); } if (args->ac.num_work_groups.used) { - set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3); + set_loc_shader(info, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3); } if (args->ac.ray_launch_size.used) { - set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3); + set_loc_shader(info, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3); } break; case MESA_SHADER_VERTEX: if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_TESS_CTRL: if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_TESS_EVAL: if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_GEOMETRY: if (args->ac.view_index.used) - set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); if (args->ngg_gs_state.used) { - set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1); } if (args->ngg_culling_settings.used) { - set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1); + set_loc_shader(info, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1); } if (args->ngg_viewport_scale[0].used) { assert(args->ngg_viewport_scale[1].used && args->ngg_viewport_translate[0].used && args->ngg_viewport_translate[1].used); - set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4); + set_loc_shader(info, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4); } break; case MESA_SHADER_FRAGMENT: @@ -800,5 +811,5 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, unreachable("Shader stage not implemented"); } - args->shader_info->num_user_sgprs = user_sgpr_idx; + info->num_user_sgprs = user_sgpr_idx; } diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index a6d4b8b..6aa98a6 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -30,8 +30,6 @@ struct radv_shader_args { struct ac_shader_args ac; - struct radv_shader_info *shader_info; - const struct radv_nir_compiler_options *options; struct ac_arg descriptor_sets[MAX_SETS]; struct ac_arg ring_offsets; @@ -58,5 +56,10 @@ radv_shader_args_from_ac(struct ac_shader_args *args) return container_of(args, struct radv_shader_args, ac); } -void radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage); +struct radv_nir_compiler_options; +struct radv_shader_info; + +void radv_declare_shader_args(const struct radv_nir_compiler_options *options, + struct radv_shader_info *info, gl_shader_stage stage, + bool has_previous_stage, gl_shader_stage previous_stage, + struct radv_shader_args *args); -- 2.7.4