From ed8250360f36db84e07885c2712e10f5ed7f9550 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 31 May 2023 14:19:14 -0400 Subject: [PATCH] amd: add radeon_info* into ac_llvm_context and radv_nir_compiler_options Reviewed-by: Qiang Yu Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_shader_util.c | 10 +++++----- src/amd/common/ac_shader_util.h | 2 +- src/amd/llvm/ac_llvm_build.c | 20 ++++++++++---------- src/amd/llvm/ac_llvm_build.h | 11 +++++------ src/amd/llvm/ac_nir_to_llvm.c | 10 +++++----- src/amd/llvm/ac_shader_abi.h | 3 --- src/amd/vulkan/radv_aco_shader_info.h | 8 ++++---- src/amd/vulkan/radv_nir_to_llvm.c | 12 +++++------- src/amd/vulkan/radv_shader.c | 9 ++------- src/amd/vulkan/radv_shader.h | 7 +------ src/gallium/drivers/radeonsi/si_shader_llvm.c | 4 +--- 11 files changed, 39 insertions(+), 57 deletions(-) diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index fbdb0d0..fa264a2 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1052,7 +1052,7 @@ enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *inst * "access" must be a result of ac_get_mem_access_flags() with the appropriate ACCESS_TYPE_* * flags set. */ -union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, +union ac_hw_cache_flags ac_get_hw_cache_flags(const struct radeon_info *info, enum gl_access_qualifier access) { union ac_hw_cache_flags result; @@ -1066,7 +1066,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE); - if (gfx_level >= GFX11) { + if (info->gfx_level >= GFX11) { /* GFX11 simplified it and exposes what is actually useful. * * GLC means device scope for loads only. (stores and atomics are always device scope) @@ -1080,7 +1080,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM)) result.value |= ac_slc; - } else if (gfx_level >= GFX10) { + } else if (info->gfx_level >= GFX10) { /* GFX10-10.3: * * VMEM and SMEM loads (SMEM only supports the first four): @@ -1134,7 +1134,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, */ if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) { /* SMEM doesn't support the device scope on GFX6-7. */ - assert(gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM)); + assert(info->gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM)); result.value |= ac_glc; } @@ -1144,7 +1144,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not * aligned to a dword are affected. */ - if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD) + if (info->gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD) result.value |= ac_glc; } diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index a11debb..98a727f 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -221,7 +221,7 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage, enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr); -union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level, +union ac_hw_cache_flags ac_get_hw_cache_flags(const struct radeon_info *info, enum gl_access_qualifier access); #ifdef __cplusplus diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index ba2d8cd..b1bf94a 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -5,7 +5,7 @@ */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_build.h" - +#include "ac_gpu_info.h" #include "ac_nir.h" #include "ac_llvm_util.h" #include "ac_shader_util.h" @@ -37,16 +37,14 @@ struct ac_llvm_flow { * The caller is responsible for initializing ctx::module and ctx::builder. */ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler, - enum amd_gfx_level gfx_level, enum radeon_family family, - bool has_3d_cube_border_color_mipmap, - enum ac_float_mode float_mode, unsigned wave_size, - unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz) + const struct radeon_info *info, enum ac_float_mode float_mode, + unsigned wave_size, unsigned ballot_mask_bits, bool exports_color_null, + bool exports_mrtz) { ctx->context = LLVMContextCreate(); - ctx->gfx_level = gfx_level; - ctx->family = family; - ctx->has_3d_cube_border_color_mipmap = has_3d_cube_border_color_mipmap; + ctx->info = info; + ctx->gfx_level = info->gfx_level; ctx->wave_size = wave_size; ctx->ballot_mask_bits = ballot_mask_bits; ctx->float_mode = float_mode; @@ -1016,7 +1014,7 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx, static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access) { - return ac_get_hw_cache_flags(ctx->gfx_level, access).value; + return ac_get_hw_cache_flags(ctx->info, access).value; } static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc, @@ -3752,7 +3750,9 @@ void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueR /* GFX6 (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ - if (ctx->gfx_level == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN) + if (ctx->gfx_level == GFX6 && + ctx->info->family != CHIP_OLAND && + ctx->info->family != CHIP_HAINAN) mask |= 0x1; /* Specify which components to enable */ diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index b91aeca..3689259 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -37,6 +37,7 @@ enum struct ac_llvm_flow; struct ac_llvm_compiler; +struct radeon_info; struct ac_llvm_flow_state { struct ac_llvm_flow *stack; @@ -129,9 +130,8 @@ struct ac_llvm_context { unsigned uniform_md_kind; LLVMValueRef empty_md; + const struct radeon_info *info; enum amd_gfx_level gfx_level; - enum radeon_family family; - bool has_3d_cube_border_color_mipmap; unsigned wave_size; unsigned ballot_mask_bits; @@ -148,10 +148,9 @@ struct ac_llvm_context { }; void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler, - enum amd_gfx_level gfx_level, enum radeon_family family, - bool has_3d_cube_border_color_mipmap, - enum ac_float_mode float_mode, unsigned wave_size, - unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz); + const struct radeon_info *info, enum ac_float_mode float_mode, + unsigned wave_size, unsigned ballot_mask_bits, bool exports_color_null, + bool exports_mrtz); void ac_llvm_context_dispose(struct ac_llvm_context *ctx); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index c318502..548c7eb 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -1532,7 +1532,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te } /* MI200 doesn't have image_sample_lz, but image_sample behaves like lz. */ - if (!ctx->ac.has_3d_cube_border_color_mipmap) + if (!ctx->ac.info->has_3d_cube_border_color_mipmap) args->level_zero = false; if (instr->op == nir_texop_tg4 && ctx->ac.gfx_level <= GFX8 && @@ -1933,7 +1933,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_ } unsigned cache_flags = - ac_get_hw_cache_flags(ctx->ac.gfx_level, + ac_get_hw_cache_flags(ctx->ac.info, ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; params[arg_count++] = data; @@ -2599,7 +2599,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int LLVMTypeRef data_type = LLVMTypeOf(params[0]); char type[8]; unsigned cache_flags = - ac_get_hw_cache_flags(ctx->ac.gfx_level, + ac_get_hw_cache_flags(ctx->ac.info, ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value; params[param_count++] = ctx->ac.i32_0; /* soffset */ @@ -3665,7 +3665,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins const unsigned align_mul = nir_intrinsic_align_mul(instr); const enum pipe_format format = nir_intrinsic_format(instr); const struct ac_vtx_format_info *vtx_info = - ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.family, format); + ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.info->family, format); result = ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, addr_voffset, addr_soffset, @@ -4211,7 +4211,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } /* Set TRUNC_COORD=0 for textureGather(). */ - if (instr->op == nir_texop_tg4 && !ctx->abi->conformant_trunc_coord) { + if (instr->op == nir_texop_tg4 && !ctx->ac.info->conformant_trunc_coord) { LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, ""); dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), ""); args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, ""); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 02ec21d..5b6f5a8 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -95,9 +95,6 @@ struct ac_shader_abi { /* Whether to disable anisotropic filtering. */ bool disable_aniso_single_level; - - /* Equal to radeon_info::conformant_trunc_coord. */ - bool conformant_trunc_coord; }; #endif /* AC_SHADER_ABI_H */ diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 850f509..88c9730 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -112,18 +112,18 @@ radv_aco_convert_opts(struct aco_compiler_options *aco_info, ASSIGN_FIELD(dump_preoptir); ASSIGN_FIELD(record_ir); ASSIGN_FIELD(record_stats); - ASSIGN_FIELD(has_ls_vgpr_init_bug); ASSIGN_FIELD(enable_mrt_output_nan_fixup); ASSIGN_FIELD(wgp_mode); - ASSIGN_FIELD(family); - ASSIGN_FIELD(gfx_level); - ASSIGN_FIELD(address32_hi); ASSIGN_FIELD(debug.func); ASSIGN_FIELD(debug.private_data); ASSIGN_FIELD(debug.private_data); aco_info->is_opengl = false; aco_info->load_grid_size_from_user_sgpr = radv_args->load_grid_size_from_user_sgpr; aco_info->optimisations_disabled = radv->key.optimisations_disabled; + aco_info->gfx_level = radv->info->gfx_level; + aco_info->family = radv->info->family; + aco_info->address32_hi = radv->info->address32_hi; + aco_info->has_ls_vgpr_init_bug = radv->info->has_ls_vgpr_init_bug; } #undef ASSIGN_VS_STATE_FIELD #undef ASSIGN_VS_STATE_FIELD_CP diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 81b1931..0378950 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -74,9 +74,9 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil { struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module); - if (options->address32_hi) { + if (options->info->address32_hi) { ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits", - options->address32_hi); + options->info->address32_hi); } ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size); @@ -335,8 +335,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0)); } - ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family, - options->has_3d_cube_border_color_mipmap, + ac_llvm_context_init(&ctx.ac, ac_llvm, options->info, float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz); uint32_t length = 1; @@ -373,7 +372,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ctx.abi.clamp_shadow_reference = false; ctx.abi.robust_buffer_access = options->robust_buffer_access; ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr; - ctx.abi.conformant_trunc_coord = options->conformant_trunc_coord; bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg; if (shader_count >= 2 || is_ngg) @@ -386,7 +384,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, if (args->ac.instance_id.used) ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id); - if (options->has_ls_vgpr_init_bug && + if (options->info->has_ls_vgpr_init_bug && shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) ac_fixup_ls_hs_input_vgprs(&ctx.ac, &ctx.abi, &args->ac); @@ -602,7 +600,7 @@ llvm_compile_shader(const struct radv_nir_compiler_options *options, if (options->check_ir) tm_options |= AC_TM_CHECK_IR; - radv_init_llvm_compiler(&ac_llvm, options->family, tm_options, info->wave_size); + radv_init_llvm_compiler(&ac_llvm, options->info->family, tm_options, info->wave_size); radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count); } diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 2e5d9dc..db5be8f 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2124,18 +2124,13 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, options->robust_buffer_access = device->robust_buffer_access; options->wgp_mode = should_use_wgp; - options->family = device->physical_device->rad_info.family; - options->gfx_level = device->physical_device->rad_info.gfx_level; - options->has_3d_cube_border_color_mipmap = device->physical_device->rad_info.has_3d_cube_border_color_mipmap; - options->conformant_trunc_coord = device->physical_device->rad_info.conformant_trunc_coord; + options->info = &device->physical_device->rad_info; options->dump_shader = can_dump_shader; options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; options->record_ir = keep_shader_info; options->record_stats = keep_statistic_info; options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR; - options->address32_hi = device->physical_device->rad_info.address32_hi; - options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug; if (!is_meta_shader) options->enable_mrt_output_nan_fixup = options->key.ps.epilog.enable_mrt_output_nan_fixup; @@ -2362,7 +2357,7 @@ radv_create_rt_prolog(struct radv_device *device) radv_declare_shader_args(device, &pipeline_key, &info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, RADV_SHADER_TYPE_DEFAULT, &in_args); - radv_declare_rt_shader_args(options.gfx_level, &out_args); + radv_declare_rt_shader_args(options.info->gfx_level, &out_args); info.user_sgprs_locs = in_args.user_sgprs_locs; #ifdef LLVM_AVAILABLE diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index b5da8be2..f8cb846 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -133,14 +133,9 @@ struct radv_nir_compiler_options { bool record_ir; bool record_stats; bool check_ir; - bool has_ls_vgpr_init_bug; uint8_t enable_mrt_output_nan_fixup; bool wgp_mode; - enum radeon_family family; - enum amd_gfx_level gfx_level; - uint32_t address32_hi; - bool has_3d_cube_border_color_mipmap; - bool conformant_trunc_coord; + const struct radeon_info *info; struct { void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 1c5a11d..7521063 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -116,8 +116,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscre ctx->screen = sscreen; ctx->compiler = compiler; - ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family, - sscreen->info.has_3d_cube_border_color_mipmap, float_mode, + ac_llvm_context_init(&ctx->ac, compiler, &sscreen->info, float_mode, wave_size, 64, exports_color_null, exports_mrtz); } @@ -955,7 +954,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero || info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO; ctx->abi.disable_aniso_single_level = true; - ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord; bool ls_need_output = ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls && -- 2.7.4