ac/llvm: set target features per function instead of per target machine

author Marek Olšák <marek.olsak@amd.com>

Sat, 8 May 2021 04:34:05 +0000 (00:34 -0400)

committer Marge Bot <eric+marge@anholt.net>

Tue, 25 May 2021 16:15:44 +0000 (16:15 +0000)
author Marek Olšák <marek.olsak@amd.com>
Sat, 8 May 2021 04:34:05 +0000 (00:34 -0400)
committer Marge Bot <eric+marge@anholt.net>
Tue, 25 May 2021 16:15:44 +0000 (16:15 +0000)
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c

index 869530c..a65da8e 100644 (file)
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -69,8 +69,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
     ctx->wave_size = wave_size;
     ctx->ballot_mask_bits = ballot_mask_bits;
     ctx->float_mode = float_mode;
-   ctx->module =
-      ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context);
+   ctx->module = ac_create_module(compiler->tm, ctx->context);
     ctx->builder = ac_create_builder(ctx->context, float_mode);
  
     ctx->voidt = LLVMVoidTypeInContext(ctx->context);
diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c

index 8ba7435..e285c30 100644 (file)
--- a/src/amd/llvm/ac_llvm_util.c
+++ b/src/amd/llvm/ac_llvm_util.c
@@ -188,18 +188,11 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
                                                       const char **out_triple)
  {
     assert(family >= CHIP_TAHITI);
-   char features[256];
     const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
     LLVMTargetRef target = ac_get_llvm_target(triple);
  
-   snprintf(features, sizeof(features), "+DumpCode%s%s",
-            family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
-               ? ",+wavefrontsize64,-wavefrontsize32"
-               : "",
-            tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
-
     LLVMTargetMachineRef tm =
-      LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
+      LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level,
                                LLVMRelocDefault, LLVMCodeModelDefault);
  
     if (out_triple)
@@ -317,6 +310,20 @@ void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
     LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
  }
  
+void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)
+{
+   char features[2048];
+
+   snprintf(features, sizeof(features), "+DumpCode%s%s",
+            /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */
+            ctx->chip_class == GFX9 ? ",-promote-alloca" : "",
+            /* Wave32 is the default. */
+            ctx->chip_class >= GFX10 && ctx->wave_size == 64 ?
+               ",+wavefrontsize64,-wavefrontsize32" : "");
+
+   LLVMAddTargetDependentFunctionAttr(F, "target-features", features);
+}
+
  unsigned ac_count_scratch_private_memory(LLVMValueRef function)
  {
     unsigned private_mem_vgprs = 0;
@@ -362,14 +369,6 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family
           goto fail;
     }
  
-   if (family >= CHIP_NAVI10) {
-      assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
-      compiler->tm_wave32 =
-         ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
-      if (!compiler->tm_wave32)
-         goto fail;
-   }
-
     compiler->target_library_info = ac_create_target_library_info(triple);
     if (!compiler->target_library_info)
        goto fail;
@@ -388,7 +387,6 @@ fail:
  void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
  {
     ac_destroy_llvm_passes(compiler->passes);
-   ac_destroy_llvm_passes(compiler->passes_wave32);
     ac_destroy_llvm_passes(compiler->low_opt_passes);
  
     if (compiler->passmgr)
@@ -399,6 +397,4 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
        LLVMDisposeTargetMachine(compiler->low_opt_tm);
     if (compiler->tm)
        LLVMDisposeTargetMachine(compiler->tm);
-   if (compiler->tm_wave32)
-      LLVMDisposeTargetMachine(compiler->tm_wave32);
  }
diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h

index c4e9ec5..455b5ea 100644 (file)
--- a/src/amd/llvm/ac_llvm_util.h
+++ b/src/amd/llvm/ac_llvm_util.h
@@ -61,12 +61,10 @@ enum ac_func_attr
  
  enum ac_target_machine_options
  {
-   AC_TM_SUPPORTS_SPILL = (1 << 0),
-   AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
-   AC_TM_CHECK_IR = (1 << 4),
-   AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
-   AC_TM_CREATE_LOW_OPT = (1 << 6),
-   AC_TM_WAVE32 = (1 << 7),
+   AC_TM_SUPPORTS_SPILL       = 1 << 0,
+   AC_TM_CHECK_IR             = 1 << 1,
+   AC_TM_ENABLE_GLOBAL_ISEL   = 1 << 2,
+   AC_TM_CREATE_LOW_OPT       = 1 << 3,
  };
  
  enum ac_float_mode
@@ -85,10 +83,6 @@ struct ac_llvm_compiler {
     LLVMTargetMachineRef tm;
     struct ac_compiler_passes *passes;
  
-   /* Wave32 compiler for GFX10. */
-   LLVMTargetMachineRef tm_wave32;
-   struct ac_compiler_passes *passes_wave32;
-
     /* Optional compiler for faster compilation with fewer optimizations.
      * LLVM modules can be created with "tm" too. There is no difference.
      */
@@ -115,6 +109,7 @@ void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
  
  void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
  void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
+void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx);
  
  static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
  {
diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp

index 9ad2b5a..1e07081 100644 (file)
--- a/src/amd/vulkan/radv_llvm_helper.cpp
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -47,12 +47,6 @@ class radv_llvm_per_thread_info {
        if (!passes)
           return false;
  
-      if (llvm_info.tm_wave32) {
-         passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
-         if (!passes_wave32)
-            return false;
-      }
-
        return true;
     }
  
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c

index 60157b3..904f9f2 100644 (file)
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -107,6 +107,7 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil
     }
  
     ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+   ac_llvm_set_target_features(main_function, ctx);
  
     return main_function;
  }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 1e45721..6eb9f02 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -142,7 +142,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil
  
     enum ac_target_machine_options tm_options =
        (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
-      (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
        (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
        (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
  
@@ -150,8 +149,6 @@ void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compil
     ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
     compiler->passes = ac_create_llvm_passes(compiler->tm);
  
-   if (compiler->tm_wave32)
-      compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32);
     if (compiler->low_opt_tm)
        compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
  }
@@ -1266,9 +1263,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
               sscreen->pbb_persistent_states_per_bin <= 32);
     }
  
-   /* LLVM doesn't support VGPR indexing on GFX9. */
-   sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class != GFX9;
-
     (void)simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
     sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
  
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h

index 09bcf4d..f6afb51 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -548,7 +548,6 @@ struct si_screen {
     bool commutative_blend_add;
     bool dpbb_allowed;
     bool dfsm_allowed;
-   bool llvm_has_working_vgpr_indexing;
     bool use_ngg;
     bool use_ngg_culling;
     bool use_ngg_streamout;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c

index 6d0419b..9ee4779 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -93,9 +93,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
     if (!si_replace_shader(count, binary)) {
        struct ac_compiler_passes *passes = compiler->passes;
  
-      if (ac->wave_size == 32)
-         passes = compiler->passes_wave32;
-      else if (less_optimized && compiler->low_opt_passes)
+      if (less_optimized && compiler->low_opt_passes)
           passes = compiler->low_opt_passes;
  
        struct si_llvm_diagnostics diag = {debug};
@@ -190,6 +188,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
     }
  
     ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
+   ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
  }
  
  void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)
author	Marek Olšák <marek.olsak@amd.com>
	Sat, 8 May 2021 04:34:05 +0000 (00:34 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 25 May 2021 16:15:44 +0000 (16:15 +0000)
src/amd/llvm/ac_llvm_build.c		patch \| blob \| history
src/amd/llvm/ac_llvm_util.c		patch \| blob \| history
src/amd/llvm/ac_llvm_util.h		patch \| blob \| history
src/amd/vulkan/radv_llvm_helper.cpp		patch \| blob \| history
src/amd/vulkan/radv_nir_to_llvm.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_llvm.c		patch \| blob \| history