From c93bcb32fee552cfad32dcd5e5fa951bd5813558 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 1 Feb 2019 12:36:56 +0100 Subject: [PATCH] amd: Use inverse ballot intrinsic if available Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 14 ++++++++++++++ src/amd/llvm/ac_nir_to_llvm.c | 9 +++++++++ src/amd/vulkan/radv_shader.c | 8 +++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8dda7eb..59ece2d 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8306,6 +8306,20 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) set_wqm(ctx); break; } + case nir_intrinsic_inverse_ballot: { + Temp src = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); + Temp dst = get_ssa_temp(ctx, &instr->def); + + assert(dst.size() == bld.lm.size()); + if (src.size() > dst.size()) { + emit_extract_vector(ctx, src, 0, dst); + } else if (src.size() < dst.size()) { + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src, Operand::zero()); + } else { + bld.copy(Definition(dst), src); + } + break; + } case nir_intrinsic_shuffle: case nir_intrinsic_read_invocation: { Temp src = get_ssa_temp(ctx, instr->src[0].ssa); diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index dae2473..67bb555 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3057,6 +3057,15 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins result = LLVMBuildZExt(ctx->ac.builder, result, dest_type, ""); } break; + case nir_intrinsic_inverse_ballot: { + LLVMValueRef src = get_src(ctx, instr->src[0]); + if (instr->src[0].ssa->bit_size > ctx->ac.wave_size) { + LLVMTypeRef src_type = LLVMIntTypeInContext(ctx->ac.context, ctx->ac.wave_size); + src = LLVMBuildTrunc(ctx->ac.builder, src, src_type, ""); + } + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.inverse.ballot", ctx->ac.i1, &src, 1, 0); + break; + } case nir_intrinsic_read_invocation: result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1])); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index d94935f..eecfe43 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -623,7 +623,13 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st NIR_PASS(_, nir, nir_lower_global_vars_to_local); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + bool gfx7minus = device->physical_device->rad_info.gfx_level <= GFX7; + bool has_inverse_ballot = true; +#if LLVM_AVAILABLE + has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17; +#endif + NIR_PASS(_, nir, nir_lower_subgroups, &(struct nir_lower_subgroups_options){ .subgroup_size = subgroup_size, @@ -638,7 +644,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .lower_quad_broadcast_dynamic_to_const = gfx7minus, .lower_shuffle_to_swizzle_amd = 1, .lower_ballot_bit_count_to_mbcnt_amd = 1, - .lower_inverse_ballot = 1, + .lower_inverse_ballot = !has_inverse_ballot, }); NIR_PASS(_, nir, nir_lower_load_const_to_scalar); -- 2.7.4