amd: add radeon_info* into ac_llvm_context and radv_nir_compiler_options
authorMarek Olšák <marek.olsak@amd.com>
Wed, 31 May 2023 18:19:14 +0000 (14:19 -0400)
committerMarge Bot <emma+marge@anholt.net>
Tue, 6 Jun 2023 18:01:35 +0000 (18:01 +0000)
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22833>

src/amd/common/ac_shader_util.c
src/amd/common/ac_shader_util.h
src/amd/llvm/ac_llvm_build.c
src/amd/llvm/ac_llvm_build.h
src/amd/llvm/ac_nir_to_llvm.c
src/amd/llvm/ac_shader_abi.h
src/amd/vulkan/radv_aco_shader_info.h
src/amd/vulkan/radv_nir_to_llvm.c
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.h
src/gallium/drivers/radeonsi/si_shader_llvm.c

index fbdb0d0..fa264a2 100644 (file)
@@ -1052,7 +1052,7 @@ enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *inst
  * "access" must be a result of ac_get_mem_access_flags() with the appropriate ACCESS_TYPE_*
  * flags set.
  */
-union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
+union ac_hw_cache_flags ac_get_hw_cache_flags(const struct radeon_info *info,
                                               enum gl_access_qualifier access)
 {
    union ac_hw_cache_flags result;
@@ -1066,7 +1066,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
 
    bool scope_is_device = access & (ACCESS_COHERENT | ACCESS_VOLATILE);
 
-   if (gfx_level >= GFX11) {
+   if (info->gfx_level >= GFX11) {
       /* GFX11 simplified it and exposes what is actually useful.
        *
        * GLC means device scope for loads only. (stores and atomics are always device scope)
@@ -1080,7 +1080,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
 
       if (access & ACCESS_NON_TEMPORAL && !(access & ACCESS_TYPE_SMEM))
          result.value |= ac_slc;
-   } else if (gfx_level >= GFX10) {
+   } else if (info->gfx_level >= GFX10) {
       /* GFX10-10.3:
        *
        * VMEM and SMEM loads (SMEM only supports the first four):
@@ -1134,7 +1134,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
        */
       if (scope_is_device && !(access & ACCESS_TYPE_ATOMIC)) {
          /* SMEM doesn't support the device scope on GFX6-7. */
-         assert(gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM));
+         assert(info->gfx_level >= GFX8 || !(access & ACCESS_TYPE_SMEM));
          result.value |= ac_glc;
       }
 
@@ -1144,7 +1144,7 @@ union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
       /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All store opcodes not
        * aligned to a dword are affected.
        */
-      if (gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
+      if (info->gfx_level == GFX6 && access & ACCESS_MAY_STORE_SUBDWORD)
          result.value |= ac_glc;
    }
 
index a11debb..98a727f 100644 (file)
@@ -221,7 +221,7 @@ ac_ngg_get_scratch_lds_size(gl_shader_stage stage,
 
 enum gl_access_qualifier ac_get_mem_access_flags(const nir_intrinsic_instr *instr);
 
-union ac_hw_cache_flags ac_get_hw_cache_flags(enum amd_gfx_level gfx_level,
+union ac_hw_cache_flags ac_get_hw_cache_flags(const struct radeon_info *info,
                                               enum gl_access_qualifier access);
 
 #ifdef __cplusplus
index ba2d8cd..b1bf94a 100644 (file)
@@ -5,7 +5,7 @@
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_build.h"
-
+#include "ac_gpu_info.h"
 #include "ac_nir.h"
 #include "ac_llvm_util.h"
 #include "ac_shader_util.h"
@@ -37,16 +37,14 @@ struct ac_llvm_flow {
  * The caller is responsible for initializing ctx::module and ctx::builder.
  */
 void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
-                          enum amd_gfx_level gfx_level, enum radeon_family family,
-                          bool has_3d_cube_border_color_mipmap,
-                          enum ac_float_mode float_mode, unsigned wave_size,
-                          unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz)
+                          const struct radeon_info *info, enum ac_float_mode float_mode,
+                          unsigned wave_size, unsigned ballot_mask_bits, bool exports_color_null,
+                          bool exports_mrtz)
 {
    ctx->context = LLVMContextCreate();
 
-   ctx->gfx_level = gfx_level;
-   ctx->family = family;
-   ctx->has_3d_cube_border_color_mipmap = has_3d_cube_border_color_mipmap;
+   ctx->info = info;
+   ctx->gfx_level = info->gfx_level;
    ctx->wave_size = wave_size;
    ctx->ballot_mask_bits = ballot_mask_bits;
    ctx->float_mode = float_mode;
@@ -1016,7 +1014,7 @@ LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
 
 static unsigned get_cache_flags(struct ac_llvm_context *ctx, enum gl_access_qualifier access)
 {
-   return ac_get_hw_cache_flags(ctx->gfx_level, access).value;
+   return ac_get_hw_cache_flags(ctx->info, access).value;
 }
 
 static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
@@ -3752,7 +3750,9 @@ void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueR
 
    /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
     * at the X writemask component. */
-   if (ctx->gfx_level == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
+   if (ctx->gfx_level == GFX6 &&
+       ctx->info->family != CHIP_OLAND &&
+       ctx->info->family != CHIP_HAINAN)
       mask |= 0x1;
 
    /* Specify which components to enable */
index b91aeca..3689259 100644 (file)
@@ -37,6 +37,7 @@ enum
 
 struct ac_llvm_flow;
 struct ac_llvm_compiler;
+struct radeon_info;
 
 struct ac_llvm_flow_state {
    struct ac_llvm_flow *stack;
@@ -129,9 +130,8 @@ struct ac_llvm_context {
    unsigned uniform_md_kind;
    LLVMValueRef empty_md;
 
+   const struct radeon_info *info;
    enum amd_gfx_level gfx_level;
-   enum radeon_family family;
-   bool has_3d_cube_border_color_mipmap;
 
    unsigned wave_size;
    unsigned ballot_mask_bits;
@@ -148,10 +148,9 @@ struct ac_llvm_context {
 };
 
 void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
-                          enum amd_gfx_level gfx_level, enum radeon_family family,
-                          bool has_3d_cube_border_color_mipmap,
-                          enum ac_float_mode float_mode, unsigned wave_size,
-                          unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz);
+                          const struct radeon_info *info, enum ac_float_mode float_mode,
+                          unsigned wave_size, unsigned ballot_mask_bits, bool exports_color_null,
+                          bool exports_mrtz);
 
 void ac_llvm_context_dispose(struct ac_llvm_context *ctx);
 
index c318502..548c7eb 100644 (file)
@@ -1532,7 +1532,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
    }
 
    /* MI200 doesn't have image_sample_lz, but image_sample behaves like lz. */
-   if (!ctx->ac.has_3d_cube_border_color_mipmap)
+   if (!ctx->ac.info->has_3d_cube_border_color_mipmap)
       args->level_zero = false;
 
    if (instr->op == nir_texop_tg4 && ctx->ac.gfx_level <= GFX8 &&
@@ -1933,7 +1933,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_
       }
 
       unsigned cache_flags =
-         ac_get_hw_cache_flags(ctx->ac.gfx_level,
+         ac_get_hw_cache_flags(ctx->ac.info,
                               ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
 
       params[arg_count++] = data;
@@ -2599,7 +2599,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_int
          LLVMTypeRef data_type = LLVMTypeOf(params[0]);
          char type[8];
          unsigned cache_flags =
-            ac_get_hw_cache_flags(ctx->ac.gfx_level,
+            ac_get_hw_cache_flags(ctx->ac.info,
                                  ac_get_mem_access_flags(instr) | ACCESS_TYPE_ATOMIC).value;
 
          params[param_count++] = ctx->ac.i32_0; /* soffset */
@@ -3665,7 +3665,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
             const unsigned align_mul = nir_intrinsic_align_mul(instr);
             const enum pipe_format format = nir_intrinsic_format(instr);
             const struct ac_vtx_format_info *vtx_info =
-               ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.family, format);
+               ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.info->family, format);
 
             result =
                ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, addr_voffset, addr_soffset,
@@ -4211,7 +4211,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
    }
 
    /* Set TRUNC_COORD=0 for textureGather(). */
-   if (instr->op == nir_texop_tg4 && !ctx->abi->conformant_trunc_coord) {
+   if (instr->op == nir_texop_tg4 && !ctx->ac.info->conformant_trunc_coord) {
       LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, "");
       dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), "");
       args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, "");
index 02ec21d..5b6f5a8 100644 (file)
@@ -95,9 +95,6 @@ struct ac_shader_abi {
 
    /* Whether to disable anisotropic filtering. */
    bool disable_aniso_single_level;
-
-   /* Equal to radeon_info::conformant_trunc_coord. */
-   bool conformant_trunc_coord;
 };
 
 #endif /* AC_SHADER_ABI_H */
index 850f509..88c9730 100644 (file)
@@ -112,18 +112,18 @@ radv_aco_convert_opts(struct aco_compiler_options *aco_info,
    ASSIGN_FIELD(dump_preoptir);
    ASSIGN_FIELD(record_ir);
    ASSIGN_FIELD(record_stats);
-   ASSIGN_FIELD(has_ls_vgpr_init_bug);
    ASSIGN_FIELD(enable_mrt_output_nan_fixup);
    ASSIGN_FIELD(wgp_mode);
-   ASSIGN_FIELD(family);
-   ASSIGN_FIELD(gfx_level);
-   ASSIGN_FIELD(address32_hi);
    ASSIGN_FIELD(debug.func);
    ASSIGN_FIELD(debug.private_data);
    ASSIGN_FIELD(debug.private_data);
    aco_info->is_opengl = false;
    aco_info->load_grid_size_from_user_sgpr = radv_args->load_grid_size_from_user_sgpr;
    aco_info->optimisations_disabled = radv->key.optimisations_disabled;
+   aco_info->gfx_level = radv->info->gfx_level;
+   aco_info->family = radv->info->family;
+   aco_info->address32_hi = radv->info->address32_hi;
+   aco_info->has_ls_vgpr_init_bug = radv->info->has_ls_vgpr_init_bug;
 }
 #undef ASSIGN_VS_STATE_FIELD
 #undef ASSIGN_VS_STATE_FIELD_CP
index 81b1931..0378950 100644 (file)
@@ -74,9 +74,9 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil
 {
    struct ac_llvm_pointer main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
 
-   if (options->address32_hi) {
+   if (options->info->address32_hi) {
       ac_llvm_add_target_dep_function_attr(main_function.value, "amdgpu-32bit-address-high-bits",
-                                           options->address32_hi);
+                                           options->info->address32_hi);
    }
 
    ac_llvm_set_workgroup_size(main_function.value, max_workgroup_size);
@@ -335,8 +335,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
       exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0));
    }
 
-   ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family,
-                        options->has_3d_cube_border_color_mipmap,
+   ac_llvm_context_init(&ctx.ac, ac_llvm, options->info,
                         float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz);
 
    uint32_t length = 1;
@@ -373,7 +372,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
    ctx.abi.clamp_shadow_reference = false;
    ctx.abi.robust_buffer_access = options->robust_buffer_access;
    ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
-   ctx.abi.conformant_trunc_coord = options->conformant_trunc_coord;
 
    bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
    if (shader_count >= 2 || is_ngg)
@@ -386,7 +384,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
    if (args->ac.instance_id.used)
       ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
 
-   if (options->has_ls_vgpr_init_bug &&
+   if (options->info->has_ls_vgpr_init_bug &&
        shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
       ac_fixup_ls_hs_input_vgprs(&ctx.ac, &ctx.abi, &args->ac);
 
@@ -602,7 +600,7 @@ llvm_compile_shader(const struct radv_nir_compiler_options *options,
    if (options->check_ir)
       tm_options |= AC_TM_CHECK_IR;
 
-   radv_init_llvm_compiler(&ac_llvm, options->family, tm_options, info->wave_size);
+   radv_init_llvm_compiler(&ac_llvm, options->info->family, tm_options, info->wave_size);
 
    radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count);
 }
index 2e5d9dc..db5be8f 100644 (file)
@@ -2124,18 +2124,13 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options,
 
    options->robust_buffer_access = device->robust_buffer_access;
    options->wgp_mode = should_use_wgp;
-   options->family = device->physical_device->rad_info.family;
-   options->gfx_level = device->physical_device->rad_info.gfx_level;
-   options->has_3d_cube_border_color_mipmap = device->physical_device->rad_info.has_3d_cube_border_color_mipmap;
-   options->conformant_trunc_coord = device->physical_device->rad_info.conformant_trunc_coord;
+   options->info = &device->physical_device->rad_info;
    options->dump_shader = can_dump_shader;
    options->dump_preoptir =
       options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
    options->record_ir = keep_shader_info;
    options->record_stats = keep_statistic_info;
    options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
-   options->address32_hi = device->physical_device->rad_info.address32_hi;
-   options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
 
    if (!is_meta_shader)
       options->enable_mrt_output_nan_fixup = options->key.ps.epilog.enable_mrt_output_nan_fixup;
@@ -2362,7 +2357,7 @@ radv_create_rt_prolog(struct radv_device *device)
 
    radv_declare_shader_args(device, &pipeline_key, &info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE,
                             RADV_SHADER_TYPE_DEFAULT, &in_args);
-   radv_declare_rt_shader_args(options.gfx_level, &out_args);
+   radv_declare_rt_shader_args(options.info->gfx_level, &out_args);
    info.user_sgprs_locs = in_args.user_sgprs_locs;
 
 #ifdef LLVM_AVAILABLE
index b5da8be..f8cb846 100644 (file)
@@ -133,14 +133,9 @@ struct radv_nir_compiler_options {
    bool record_ir;
    bool record_stats;
    bool check_ir;
-   bool has_ls_vgpr_init_bug;
    uint8_t enable_mrt_output_nan_fixup;
    bool wgp_mode;
-   enum radeon_family family;
-   enum amd_gfx_level gfx_level;
-   uint32_t address32_hi;
-   bool has_3d_cube_border_color_mipmap;
-   bool conformant_trunc_coord;
+   const struct radeon_info *info;
 
    struct {
       void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
index 1c5a11d..7521063 100644 (file)
@@ -116,8 +116,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscre
    ctx->screen = sscreen;
    ctx->compiler = compiler;
 
-   ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family,
-                        sscreen->info.has_3d_cube_border_color_mipmap, float_mode,
+   ac_llvm_context_init(&ctx->ac, compiler, &sscreen->info, float_mode,
                         wave_size, 64, exports_color_null, exports_mrtz);
 }
 
@@ -955,7 +954,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
    ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
                                 info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
    ctx->abi.disable_aniso_single_level = true;
-   ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;
 
    bool ls_need_output =
       ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&