ctx->wave_size = wave_size;
ctx->ballot_mask_bits = ballot_mask_bits;
ctx->float_mode = float_mode;
- ctx->module =
- ac_create_module(wave_size == 32 ? compiler->tm_wave32 : compiler->tm, ctx->context);
+ ctx->module = ac_create_module(compiler->tm, ctx->context);
ctx->builder = ac_create_builder(ctx->context, float_mode);
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
const char **out_triple)
{
assert(family >= CHIP_TAHITI);
- char features[256];
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
LLVMTargetRef target = ac_get_llvm_target(triple);
- snprintf(features, sizeof(features), "+DumpCode%s%s",
- family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
- ? ",+wavefrontsize64,-wavefrontsize32"
- : "",
- tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
-
LLVMTargetMachineRef tm =
- LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
+ LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level,
LLVMRelocDefault, LLVMCodeModelDefault);
if (out_triple)
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
}
+void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)
+{
+ char features[2048];
+
+ snprintf(features, sizeof(features), "+DumpCode%s%s",
+ /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */
+ ctx->chip_class == GFX9 ? ",-promote-alloca" : "",
+ /* Wave32 is the default. */
+ ctx->chip_class >= GFX10 && ctx->wave_size == 64 ?
+ ",+wavefrontsize64,-wavefrontsize32" : "");
+
+ LLVMAddTargetDependentFunctionAttr(F, "target-features", features);
+}
+
unsigned ac_count_scratch_private_memory(LLVMValueRef function)
{
unsigned private_mem_vgprs = 0;
goto fail;
}
- if (family >= CHIP_NAVI10) {
- assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
- compiler->tm_wave32 =
- ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
- if (!compiler->tm_wave32)
- goto fail;
- }
-
compiler->target_library_info = ac_create_target_library_info(triple);
if (!compiler->target_library_info)
goto fail;
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
{
ac_destroy_llvm_passes(compiler->passes);
- ac_destroy_llvm_passes(compiler->passes_wave32);
ac_destroy_llvm_passes(compiler->low_opt_passes);
if (compiler->passmgr)
LLVMDisposeTargetMachine(compiler->low_opt_tm);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
- if (compiler->tm_wave32)
- LLVMDisposeTargetMachine(compiler->tm_wave32);
}
enum ac_target_machine_options
{
- AC_TM_SUPPORTS_SPILL = (1 << 0),
- AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
- AC_TM_CHECK_IR = (1 << 4),
- AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
- AC_TM_CREATE_LOW_OPT = (1 << 6),
- AC_TM_WAVE32 = (1 << 7),
+ AC_TM_SUPPORTS_SPILL = 1 << 0,
+ AC_TM_CHECK_IR = 1 << 1,
+ AC_TM_ENABLE_GLOBAL_ISEL = 1 << 2,
+ AC_TM_CREATE_LOW_OPT = 1 << 3,
};
enum ac_float_mode
LLVMTargetMachineRef tm;
struct ac_compiler_passes *passes;
- /* Wave32 compiler for GFX10. */
- LLVMTargetMachineRef tm_wave32;
- struct ac_compiler_passes *passes_wave32;
-
/* Optional compiler for faster compilation with fewer optimizations.
* LLVM modules can be created with "tm" too. There is no difference.
*/
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
+void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx);
static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
{
if (!passes)
return false;
- if (llvm_info.tm_wave32) {
- passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
- if (!passes_wave32)
- return false;
- }
-
return true;
}
}
ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+ ac_llvm_set_target_features(main_function, ctx);
return main_function;
}
enum ac_target_machine_options tm_options =
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
- (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
compiler->passes = ac_create_llvm_passes(compiler->tm);
- if (compiler->tm_wave32)
- compiler->passes_wave32 = ac_create_llvm_passes(compiler->tm_wave32);
if (compiler->low_opt_tm)
compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
}
sscreen->pbb_persistent_states_per_bin <= 32);
}
- /* LLVM doesn't support VGPR indexing on GFX9. */
- sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class != GFX9;
-
(void)simple_mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
sscreen->use_monolithic_shaders = (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
bool commutative_blend_add;
bool dpbb_allowed;
bool dfsm_allowed;
- bool llvm_has_working_vgpr_indexing;
bool use_ngg;
bool use_ngg_culling;
bool use_ngg_streamout;
if (!si_replace_shader(count, binary)) {
struct ac_compiler_passes *passes = compiler->passes;
- if (ac->wave_size == 32)
- passes = compiler->passes_wave32;
- else if (less_optimized && compiler->low_opt_passes)
+ if (less_optimized && compiler->low_opt_passes)
passes = compiler->low_opt_passes;
struct si_llvm_diagnostics diag = {debug};
}
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
+ ac_llvm_set_target_features(ctx->main_fn, &ctx->ac);
}
void si_llvm_create_main_func(struct si_shader_context *ctx, bool ngg_cull_shader)