From: Marek Olšák Date: Sat, 8 May 2021 04:34:05 +0000 (-0400) Subject: ac/llvm: set target features per function instead of per target machine X-Git-Tag: upstream/21.2.3~2983 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=94a1f45e15a5186bfd3aa3bc4089ad7a0902949f;p=platform%2Fupstream%2Fmesa.git ac/llvm: set target features per function instead of per target machine This is a cleanup that allows the removal of the wave32 target machine and the wave32 pass manager. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 869530c..a65da8e 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -69,8 +69,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler * ctx->wave_size = wave_size; ctx->ballot_mask_bits = ballot_mask_bits; ctx->float_mode = float_mode; - ctx->module = - ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context); + ctx->module = ac_create_module(compiler->tm, ctx->context); ctx->builder = ac_create_builder(ctx->context, float_mode); ctx->voidt = LLVMVoidTypeInContext(ctx->context); diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index 8ba7435..e285c30 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -188,18 +188,11 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, const char **out_triple) { assert(family >= CHIP_TAHITI); - char features[256]; const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); - snprintf(features, sizeof(features), "+DumpCode%s%s", - family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) - ? ",+wavefrontsize64,-wavefrontsize32" - : "", - tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : ""); - LLVMTargetMachineRef tm = - LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level, + LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level, LLVMRelocDefault, LLVMCodeModelDefault); if (out_triple) @@ -317,6 +310,20 @@ void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size) LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str); } +void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx) +{ + char features[2048]; + + snprintf(features, sizeof(features), "+DumpCode%s%s", + /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */ + ctx->chip_class == GFX9 ? ",-promote-alloca" : "", + /* Wave32 is the default. */ + ctx->chip_class >= GFX10 && ctx->wave_size == 64 ? + ",+wavefrontsize64,-wavefrontsize32" : ""); + + LLVMAddTargetDependentFunctionAttr(F, "target-features", features); +} + unsigned ac_count_scratch_private_memory(LLVMValueRef function) { unsigned private_mem_vgprs = 0; @@ -362,14 +369,6 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family goto fail; } - if (family >= CHIP_NAVI10) { - assert(!(tm_options & AC_TM_CREATE_LOW_OPT)); - compiler->tm_wave32 = - ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL); - if (!compiler->tm_wave32) - goto fail; - } - compiler->target_library_info = ac_create_target_library_info(triple); if (!compiler->target_library_info) goto fail; @@ -388,7 +387,6 @@ fail: void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) { ac_destroy_llvm_passes(compiler->passes); - ac_destroy_llvm_passes(compiler->passes_wave32); ac_destroy_llvm_passes(compiler->low_opt_passes); if (compiler->passmgr) @@ -399,6 +397,4 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) LLVMDisposeTargetMachine(compiler->low_opt_tm); if (compiler->tm) LLVMDisposeTargetMachine(compiler->tm); - if (compiler->tm_wave32) - LLVMDisposeTargetMachine(compiler->tm_wave32); } diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h index c4e9ec5..455b5ea 100644 --- a/src/amd/llvm/ac_llvm_util.h +++ b/src/amd/llvm/ac_llvm_util.h @@ -61,12 +61,10 @@ enum ac_func_attr enum ac_target_machine_options { - AC_TM_SUPPORTS_SPILL = (1 << 0), - AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3), - AC_TM_CHECK_IR = (1 << 4), - AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5), - AC_TM_CREATE_LOW_OPT = (1 << 6), - AC_TM_WAVE32 = (1 << 7), + AC_TM_SUPPORTS_SPILL = 1 << 0, + AC_TM_CHECK_IR = 1 << 1, + AC_TM_ENABLE_GLOBAL_ISEL = 1 << 2, + AC_TM_CREATE_LOW_OPT = 1 << 3, }; enum ac_float_mode @@ -85,10 +83,6 @@ struct ac_llvm_compiler { LLVMTargetMachineRef tm; struct ac_compiler_passes *passes; - /* Wave32 compiler for GFX10. */ - LLVMTargetMachineRef tm_wave32; - struct ac_compiler_passes *passes_wave32; - /* Optional compiler for faster compilation with fewer optimizations. * LLVM modules can be created with "tm" too. There is no difference. */ @@ -115,6 +109,7 @@ void ac_disable_signed_zeros(struct ac_llvm_context *ctx); void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value); void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size); +void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx); static inline unsigned ac_get_load_intr_attribs(bool can_speculate) { diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp index 9ad2b5a..1e07081 100644 --- a/src/amd/vulkan/radv_llvm_helper.cpp +++ b/src/amd/vulkan/radv_llvm_helper.cpp @@ -47,12 +47,6 @@ class radv_llvm_per_thread_info { if (!passes) return false; - if (llvm_info.tm_wave32) { - passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32); - if (!passes_wave32) - return false; - } - return true; } diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 60157b3..904f9f2 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -107,6 +107,7 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil } ac_llvm_set_workgroup_size(main_function, max_workgroup_size); + ac_llvm_set_target_features(main_function, ctx); return main_function; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 1e45721..6eb9f02 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -142,7 +142,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) | - (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) | (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); @@ -150,8 +149,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options); compiler->passes = ac_create_llvm_passes(compiler->tm); - if (compiler->tm_wave32) - compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32); if (compiler->low_opt_tm) compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); } @@ -1266,9 +1263,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->pbb_persistent_states_per_bin <= 32); } - /* LLVM doesn't support VGPR indexing on GFX9. */ - sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class != GFX9; - (void)simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09bcf4d..f6afb51 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -548,7 +548,6 @@ struct si_screen { bool commutative_blend_add; bool dpbb_allowed; bool dfsm_allowed; - bool llvm_has_working_vgpr_indexing; bool use_ngg; bool use_ngg_culling; bool use_ngg_streamout; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 6d0419b..9ee4779 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -93,9 +93,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary, if (!si_replace_shader(count, binary)) { struct ac_compiler_passes *passes = compiler->passes; - if (ac->wave_size == 32) - passes = compiler->passes_wave32; - else if (less_optimized && compiler->low_opt_passes) + if (less_optimized && compiler->low_opt_passes) passes = compiler->low_opt_passes; struct si_llvm_diagnostics diag = {debug}; @@ -190,6 +188,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy } ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size); + ac_llvm_set_target_features(ctx->main_fn, &ctx->ac); } void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)