From 940da36a65c767ec9fc6817df3d0262b52de42f6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 22 Feb 2017 02:29:12 +0100 Subject: [PATCH] gallivm,ac: add function attributes at call sites instead of declarations They can vary at call sites if the intrinsic is NOT a legacy SI intrinsic. We need this to force readnone or inaccessiblememonly on some amdgcn intrinsics. This is only used with LLVM 4.0 and later. Intrinsics only used with LLVM <= 3.9 don't need the LEGACY flag. gallivm and ac code is in the same patch, because splitting would be more complicated with all the LEGACY uses all over the place. v2: don't change the prototype of lp_add_function_attr. Reviewed-by: Jose Fonseca (v1) --- src/amd/common/ac_llvm_build.c | 23 +++++--- src/amd/common/ac_llvm_util.c | 31 ++++++++--- src/amd/common/ac_llvm_util.h | 17 +++--- src/amd/common/ac_nir_to_llvm.c | 63 +++++++++++++-------- src/gallium/auxiliary/gallivm/lp_bld_intr.c | 68 ++++++++++++++++------- src/gallium/auxiliary/gallivm/lp_bld_intr.h | 11 +++- src/gallium/drivers/radeonsi/si_shader.c | 52 +++++++++-------- src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 14 ++--- 8 files changed, 177 insertions(+), 102 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index a70984e..a0b74a5 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -82,7 +82,9 @@ ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, LLVMTypeRef return_type, LLVMValueRef *params, unsigned param_count, unsigned attrib_mask) { - LLVMValueRef function; + LLVMValueRef function, call; + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && + !(attrib_mask & AC_FUNC_ATTR_LEGACY); function = LLVMGetNamedFunction(ctx->module, name); if (!function) { @@ -102,13 +104,14 @@ ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name, LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); - attrib_mask |= AC_FUNC_ATTR_NOUNWIND; - while (attrib_mask) { - enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); - ac_add_function_attr(function, -1, attr); - } + if (!set_callsite_attrs) + ac_add_func_attributes(ctx->context, function, attrib_mask); } - return LLVMBuildCall(ctx->builder, function, params, param_count, ""); + + call = LLVMBuildCall(ctx->builder, function, params, param_count, ""); + if (set_callsite_attrs) + ac_add_func_attributes(ctx->context, call, attrib_mask); + return call; } LLVMValueRef @@ -530,7 +533,8 @@ ac_build_tbuffer_store(struct ac_llvm_context *ctx, snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, - args, ARRAY_SIZE(args), 0); + args, ARRAY_SIZE(args), + AC_FUNC_ATTR_LEGACY); } void @@ -842,5 +846,6 @@ LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value) }; return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3, - AC_FUNC_ATTR_READNONE); + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); } diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index be127c5..fb525dd 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -24,7 +24,7 @@ */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_util.h" - +#include "util/bitscan.h" #include #include "c11/threads.h" @@ -180,12 +180,10 @@ static const char *attr_to_str(enum ac_func_attr attr) #endif -void -ac_add_function_attr(LLVMValueRef function, - int attr_idx, - enum ac_func_attr attr) +static void +ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, + int attr_idx, enum ac_func_attr attr) { - #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); if (attr_idx == -1) { @@ -194,15 +192,30 @@ ac_add_function_attr(LLVMValueRef function, LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); } #else - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); + + if (LLVMIsAFunction(function)) + LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + else + LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); #endif } +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, + unsigned attrib_mask) +{ + attrib_mask |= AC_FUNC_ATTR_NOUNWIND; + attrib_mask &= ~AC_FUNC_ATTR_LEGACY; + + while (attrib_mask) { + enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); + ac_add_function_attr(ctx, function, -1, attr); + } +} + void ac_dump_module(LLVMModuleRef module) { diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 93d3d27..4fe4ab4 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -41,20 +41,21 @@ enum ac_func_attr { AC_FUNC_ATTR_NOUNWIND = (1 << 4), AC_FUNC_ATTR_READNONE = (1 << 5), AC_FUNC_ATTR_READONLY = (1 << 6), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + AC_FUNC_ATTR_LEGACY = (1u << 31), }; LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); - -void -ac_add_function_attr(LLVMValueRef function, - int attr_idx, - enum ac_func_attr attr); - -void -ac_dump_module(LLVMModuleRef module); +void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, + unsigned attrib_mask); +void ac_dump_module(LLVMModuleRef module); #ifdef __cplusplus } diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index a3310e1..a800ff0 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1052,12 +1052,13 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, } static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx, - const char *intrin, + const char *intrin, unsigned attr_mask, LLVMValueRef srcs[3]) { LLVMValueRef result; LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE); + result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, + AC_FUNC_ATTR_READNONE | attr_mask); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); return result; @@ -1431,10 +1432,12 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) to_float_type(ctx, def_type), src[0], src[1], src[2]); break; case nir_op_ibitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src); + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", + AC_FUNC_ATTR_LEGACY, src); break; case nir_op_ubitfield_extract: - result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src); + result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", + AC_FUNC_ATTR_LEGACY, src); break; case nir_op_bitfield_insert: result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]); @@ -1666,8 +1669,9 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */ txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */ size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - txq_args, txq_arg_count, - AC_FUNC_ATTR_READNONE); + txq_args, txq_arg_count, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, @@ -1691,7 +1695,8 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, tinfo->args[0] = coord; return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); } @@ -1759,7 +1764,8 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, } } return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); } @@ -2037,7 +2043,9 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, offset, "") }; results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, - params, 2, AC_FUNC_ATTR_READNONE); + params, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); } @@ -2138,7 +2146,9 @@ load_gs_input(struct nir_to_llvm_context *ctx, args[8] = ctx->i32zero; /* TFE */ value[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", - ctx->i32, args, 9, AC_FUNC_ATTR_READONLY); + ctx->i32, args, 9, + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); } result = ac_build_gather_values(&ctx->ac, value, instr->num_components); @@ -2833,7 +2843,9 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, params[9] = ctx->i32zero; res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32, - params, 10, AC_FUNC_ATTR_READNONE); + params, 10, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { @@ -2877,7 +2889,7 @@ static void emit_discard_if(struct nir_to_llvm_context *ctx, ctx->f32zero, ""); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", ctx->voidt, - &cond, 1, 0); + &cond, 1, AC_FUNC_ATTR_LEGACY); } static LLVMValueRef @@ -3134,7 +3146,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, LLVMConstReal(ctx->f32, 1.0f), LLVMConstReal(ctx->f32, -1.0f), ""); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ctx->voidt, &kill, 1, AC_FUNC_ATTR_LEGACY); /* loop num outputs */ idx = 0; @@ -3324,7 +3336,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, ctx->shader_info->fs.can_discard = true; ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", ctx->voidt, - NULL, 0, 0); + NULL, 0, AC_FUNC_ATTR_LEGACY); break; case nir_intrinsic_discard_if: emit_discard_if(ctx, instr); @@ -4064,7 +4076,8 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, args[2] = buffer_index; input = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND); + AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_LEGACY); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); @@ -4415,8 +4428,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, LLVMValueRef packed; packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16", - ctx->i32, pack_args, 2, - AC_FUNC_ATTR_READNONE); + ctx->i32, pack_args, 2, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_LEGACY); args[chan + 5] = packed; } break; @@ -4601,7 +4615,8 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", ctx->voidt, - args, 9, 0); + args, 9, + AC_FUNC_ATTR_LEGACY); } } @@ -4656,7 +4671,8 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx) ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", ctx->voidt, - pos_args[i], 9, 0); + pos_args[i], 9, + AC_FUNC_ATTR_LEGACY); } ctx->shader_info->vs.pos_exports = num_pos_exports; @@ -4720,7 +4736,8 @@ si_export_mrt_color(struct nir_to_llvm_context *ctx, return; /* unnecessary NULL export */ ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, + AC_FUNC_ATTR_LEGACY); } static void @@ -4764,7 +4781,8 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, args[0] = LLVMConstInt(ctx->i32, mask, false); ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, + AC_FUNC_ATTR_LEGACY); } static void @@ -5219,7 +5237,8 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) value = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - AC_FUNC_ATTR_READONLY); + AC_FUNC_ATTR_READONLY | + AC_FUNC_ATTR_LEGACY); LLVMBuildStore(ctx->builder, to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c index 049671a..0b25ae5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c @@ -159,28 +159,56 @@ static const char *attr_to_str(enum lp_func_attr attr) #endif void -lp_add_function_attr(LLVMValueRef function, - int attr_idx, - enum lp_func_attr attr) +lp_add_function_attr(LLVMValueRef function_or_call, + int attr_idx, enum lp_func_attr attr) { #if HAVE_LLVM < 0x0400 LLVMAttribute llvm_attr = lp_attr_to_llvm_attr(attr); if (attr_idx == -1) { - LLVMAddFunctionAttr(function, llvm_attr); + LLVMAddFunctionAttr(function_or_call, llvm_attr); } else { - LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); + LLVMAddAttribute(LLVMGetParam(function_or_call, attr_idx - 1), llvm_attr); } #else - LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function)); + + LLVMModuleRef module; + if (LLVMIsAFunction(function_or_call)) { + module = LLVMGetGlobalParent(function_or_call); + } else { + LLVMBasicBlockRef bb = LLVMGetInstructionParent(function_or_call); + LLVMValueRef function = LLVMGetBasicBlockParent(bb); + module = LLVMGetGlobalParent(function); + } + LLVMContextRef ctx = LLVMGetModuleContext(module); + const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); - LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0); - LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); + LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); + + if (LLVMIsAFunction(function_or_call)) + LLVMAddAttributeAtIndex(function_or_call, attr_idx, llvm_attr); + else + LLVMAddCallSiteAttribute(function_or_call, attr_idx, llvm_attr); #endif } +static void +lp_add_func_attributes(LLVMValueRef function, unsigned attrib_mask) +{ + /* NoUnwind indicates that the intrinsic never raises a C++ exception. + * Set it for all intrinsics. + */ + attrib_mask |= LP_FUNC_ATTR_NOUNWIND; + attrib_mask &= ~LP_FUNC_ATTR_LEGACY; + + while (attrib_mask) { + enum lp_func_attr attr = 1u << u_bit_scan(&attrib_mask); + lp_add_function_attr(function, -1, attr); + } +} + LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, const char *name, @@ -190,7 +218,9 @@ lp_build_intrinsic(LLVMBuilderRef builder, unsigned attr_mask) { LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - LLVMValueRef function; + LLVMValueRef function, call; + bool set_callsite_attrs = HAVE_LLVM >= 0x0400 && + !(attr_mask & LP_FUNC_ATTR_LEGACY); function = LLVMGetNamedFunction(module, name); if(!function) { @@ -206,22 +236,18 @@ lp_build_intrinsic(LLVMBuilderRef builder, function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); - /* NoUnwind indicates that the intrinsic never raises a C++ exception. - * Set it for all intrinsics. - */ - attr_mask |= LP_FUNC_ATTR_NOUNWIND; - - while (attr_mask) { - enum lp_func_attr attr = 1 << u_bit_scan(&attr_mask); - lp_add_function_attr(function, -1, attr); - } + if (!set_callsite_attrs) + lp_add_func_attributes(function, attr_mask); if (gallivm_debug & GALLIVM_DEBUG_IR) { lp_debug_dump_value(function); } } - return LLVMBuildCall(builder, function, args, num_args, ""); + call = LLVMBuildCall(builder, function, args, num_args, ""); + if (set_callsite_attrs) + lp_add_func_attributes(call, attr_mask); + return call; } @@ -309,9 +335,9 @@ lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm, unsigned num_vec = src_type.length / intrin_length; LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH]; - /* don't support arbitrary size here as this is so yuck */ + /* don't support arbitrary size here as this is so yuck */ if (src_type.length % intrin_length) { - /* FIXME: This is something which should be supported + /* FIXME: This is something which should be supported * but there doesn't seem to be any need for it currently * so crash and burn. */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h index 039e9ab..0032df5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h @@ -54,6 +54,12 @@ enum lp_func_attr { LP_FUNC_ATTR_NOUNWIND = (1 << 4), LP_FUNC_ATTR_READNONE = (1 << 5), LP_FUNC_ATTR_READONLY = (1 << 6), + + /* Legacy intrinsic that needs attributes on function declarations + * and they must match the internal LLVM definition exactly, otherwise + * intrinsic selection fails. + */ + LP_FUNC_ATTR_LEGACY = (1u << 31), }; void @@ -70,9 +76,8 @@ lp_declare_intrinsic(LLVMModuleRef module, unsigned num_args); void -lp_add_function_attr(LLVMValueRef function, - int attr_idx, - enum lp_func_attr attr); +lp_add_function_attr(LLVMValueRef function_or_call, + int attr_idx, enum lp_func_attr attr); LLVMValueRef lp_build_intrinsic(LLVMBuilderRef builder, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8e51ae8..212a9be 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -401,7 +401,8 @@ static void declare_input_vs( input[i] = lp_build_intrinsic(gallivm->builder, "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); } /* Break up the vec4 into individual components */ @@ -1123,14 +1124,16 @@ static LLVMValueRef fetch_input_gs( value = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); if (tgsi_type_is_64bit(type)) { LLVMValueRef value2; args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256); value2 = lp_build_intrinsic(gallivm->builder, "llvm.SI.buffer.load.dword.i32.i32", ctx->i32, args, 9, - LP_FUNC_ATTR_READONLY); + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY); return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); } @@ -1368,7 +1371,8 @@ static LLVMValueRef buffer_load_const(struct si_shader_context *ctx, LLVMValueRef args[2] = {resource, offset}; return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, args, 2, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); } static LLVMValueRef load_sample_position(struct si_shader_context *radeon_bld, LLVMValueRef sample_id) @@ -1822,7 +1826,8 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, packed = lp_build_intrinsic(base->gallivm->builder, "llvm.SI.packf16", ctx->i32, pack_args, 2, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); args[chan + 5] = LLVMBuildBitCast(base->gallivm->builder, packed, ctx->f32, ""); @@ -1954,10 +1959,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &arg, 1, 0); + ctx->voidt, &arg, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kilp", - ctx->voidt, NULL, 0, 0); + ctx->voidt, NULL, 0, LP_FUNC_ATTR_LEGACY); } } @@ -2302,7 +2307,7 @@ handle_semantic: } else { lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", ctx->voidt, - args, 9, 0); + args, 9, LP_FUNC_ATTR_LEGACY); } if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { @@ -2388,7 +2393,8 @@ handle_semantic: pos_args[i][2] = uint->one; lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, pos_args[i], 9, 0); + ctx->voidt, pos_args[i], 9, + LP_FUNC_ATTR_LEGACY); } } @@ -2979,7 +2985,7 @@ static void si_emit_ps_exports(struct si_shader_context *ctx, for (unsigned i = 0; i < exp->num; i++) lp_build_intrinsic(ctx->gallivm.builder, "llvm.SI.export", ctx->voidt, - exp->args[i], 9, 0); + exp->args[i], 9, LP_FUNC_ATTR_LEGACY); } static void si_export_null(struct lp_build_tgsi_context *bld_base) @@ -3000,7 +3006,7 @@ static void si_export_null(struct lp_build_tgsi_context *bld_base) args[8] = base->undef; /* A */ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", - ctx->voidt, args, 9, 0); + ctx->voidt, args, 9, LP_FUNC_ATTR_LEGACY); } /** @@ -4089,7 +4095,7 @@ static void resq_emit( out = lp_build_intrinsic( builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (inst->Memory.Texture == TGSI_TEXTURE_CUBE_ARRAY) { @@ -4326,7 +4332,7 @@ static void txq_emit(const struct lp_build_tgsi_action *action, emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, "llvm.SI.getresinfo.i32", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); /* Divide the number of layers by 6 to get the number of cubes. */ if (target == TGSI_TEXTURE_CUBE_ARRAY || @@ -4735,7 +4741,7 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx, emit_data->output[emit_data->chan] = lp_build_intrinsic(builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, @@ -4759,7 +4765,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, base->gallivm->builder, "llvm.SI.vs.load.input", emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); return; } @@ -4836,7 +4842,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, emit_data->output[emit_data->chan] = lp_build_intrinsic( base->gallivm->builder, intr_name, emit_data->dst_type, emit_data->args, emit_data->arg_count, - LP_FUNC_ATTR_READNONE); + LP_FUNC_ATTR_READNONE | LP_FUNC_ATTR_LEGACY); } static void si_llvm_emit_txqs( @@ -5125,7 +5131,7 @@ static void si_llvm_emit_vertex( lp_build_const_float(gallivm, -1.0f)); lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", - ctx->voidt, &kill, 1, 0); + ctx->voidt, &kill, 1, LP_FUNC_ATTR_LEGACY); } else { lp_build_if(&if_state, gallivm, can_emit); } @@ -5747,7 +5753,8 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, /* The intrinsic kills the thread if arg < 0. */ bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0), LLVMConstReal(ctx->f32, -1), ""); - lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, 0); + lp_build_intrinsic(builder, "llvm.AMDGPU.kill", ctx->voidt, &bit, 1, + LP_FUNC_ATTR_LEGACY); } void si_shader_binary_read_config(struct ac_shader_binary *binary, @@ -6315,9 +6322,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, outputs[i].values[chan] = LLVMBuildBitCast(gallivm->builder, lp_build_intrinsic(gallivm->builder, - "llvm.SI.buffer.load.dword.i32.i32", - ctx.i32, args, 9, - LP_FUNC_ATTR_READONLY), + "llvm.SI.buffer.load.dword.i32.i32", + ctx.i32, args, 9, + LP_FUNC_ATTR_READONLY | + LP_FUNC_ATTR_LEGACY), ctx.f32, ""); } } @@ -7686,7 +7694,7 @@ static void si_build_vs_epilog_function(struct si_shader_context *ctx, lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", LLVMVoidTypeInContext(base->gallivm->context), - args, 9, 0); + args, 9, LP_FUNC_ATTR_LEGACY); } LLVMBuildRetVoid(gallivm->builder); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 10268e9..ee59fed 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -58,13 +58,9 @@ static void kil_emit(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - unsigned i; - for (i = 0; i < emit_data->arg_count; i++) { - emit_data->output[i] = lp_build_intrinsic_unary( - bld_base->base.gallivm->builder, - action->intr_name, - emit_data->dst_type, emit_data->args[i]); - } + lp_build_intrinsic(bld_base->base.gallivm->builder, + action->intr_name, emit_data->dst_type, + &emit_data->args[0], 1, LP_FUNC_ATTR_LEGACY); } static void emit_icmp(const struct lp_build_tgsi_action *action, @@ -507,7 +503,9 @@ static void emit_bfe(const struct lp_build_tgsi_action *action, bfe_sm5 = lp_build_intrinsic(builder, action->intr_name, emit_data->dst_type, emit_data->args, - emit_data->arg_count, LP_FUNC_ATTR_READNONE); + emit_data->arg_count, + LP_FUNC_ATTR_READNONE | + LP_FUNC_ATTR_LEGACY); /* Correct for GLSL semantics. */ cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2], -- 2.7.4