From 12a2374da380a9a28cacf968c33b93ba320b0407 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Tue, 15 May 2012 18:48:16 +0400 Subject: [PATCH] radeon/llvm: use IntrNoMem property for intrinsics where possible Signed-off-by: Vadim Girlin Reviewed-by: Tom Stellard --- src/gallium/drivers/r600/r600_llvm.c | 23 ++-- src/gallium/drivers/radeon/AMDGPUIntrinsics.td | 76 +++++------ src/gallium/drivers/radeon/AMDILFormats.td | 18 +-- .../drivers/radeon/R600IntrinsicsNoOpenCL.td | 2 +- src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td | 2 +- src/gallium/drivers/radeon/radeon_llvm.h | 16 +++ .../drivers/radeon/radeon_setup_tgsi_llvm.c | 139 ++++++++++++++------- 7 files changed, 171 insertions(+), 105 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index f916604..adcc24f 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -21,10 +21,11 @@ static LLVMValueRef llvm_fetch_const( enum tgsi_opcode_type type, unsigned swizzle) { - LLVMValueRef cval = lp_build_intrinsic_unary(bld_base->base.gallivm->builder, + LLVMValueRef idx = lp_build_const_int32(bld_base->base.gallivm, + radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)); + LLVMValueRef cval = build_intrinsic(bld_base->base.gallivm->builder, "llvm.AMDGPU.load.const", bld_base->base.elem_type, - lp_build_const_int32(bld_base->base.gallivm, - radeon_llvm_reg_index_soa(reg->Register.Index, swizzle))); + &idx, 1, LLVMReadNoneAttribute); return bitcast(bld_base, type, cval); } @@ -44,10 +45,11 @@ static void llvm_load_system_value( LLVMValueRef reg = lp_build_const_int32( ctx->soa.bld_base.base.gallivm, chan); - ctx->system_values[index] = lp_build_intrinsic_unary( + ctx->system_values[index] = build_intrinsic( ctx->soa.bld_base.base.gallivm->builder, "llvm.R600.load.input", - ctx->soa.bld_base.base.elem_type, reg); + ctx->soa.bld_base.base.elem_type, ®, 1, + LLVMReadNoneAttribute); } static LLVMValueRef llvm_fetch_system_value( @@ -76,10 +78,11 @@ static void llvm_load_input( LLVMValueRef reg = lp_build_const_int32( ctx->soa.bld_base.base.gallivm, soa_index + (ctx->reserved_reg_count * 4)); - ctx->inputs[soa_index] = lp_build_intrinsic_unary( + ctx->inputs[soa_index] = build_intrinsic( ctx->soa.bld_base.base.gallivm->builder, "llvm.R600.load.input", - ctx->soa.bld_base.base.elem_type, reg); + ctx->soa.bld_base.base.elem_type, ®, 1, + LLVMReadNoneAttribute); } } @@ -146,9 +149,9 @@ static void llvm_emit_tex( emit_data->inst->Src[1].Register.Index); args[2] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); - emit_data->output[0] = lp_build_intrinsic(gallivm->builder, + emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, - emit_data->dst_type, args, 3); + emit_data->dst_type, args, 3, LLVMReadNoneAttribute); } static void dp_fetch_args( @@ -189,7 +192,7 @@ static void dp_fetch_args( static struct lp_build_tgsi_action dot_action = { .fetch_args = dp_fetch_args, - .emit = lp_build_tgsi_intrinsic, + .emit = build_tgsi_intrinsic_nomem, .intr_name = "llvm.AMDGPU.dp4" }; diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td index 398fd11..1b6d802 100644 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td @@ -13,47 +13,47 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { - def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>; - def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>; - def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], []>; - def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; - def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>; + def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>; - def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>; - def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; + def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; - def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; - def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>; + def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/AMDILFormats.td b/src/gallium/drivers/radeon/AMDILFormats.td index 99489e7..309e5e0 100644 --- a/src/gallium/drivers/radeon/AMDILFormats.td +++ b/src/gallium/drivers/radeon/AMDILFormats.td @@ -407,34 +407,34 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { class VoidIntBool : Intrinsic<[llvm_i32_ty], [], []>; class UnaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], []>; + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; class UnaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], []>; + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; class ConvertIntFTOI : - Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], []>; + Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; class ConvertIntITOF : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], []>; + Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; class UnaryIntNoRetInt : Intrinsic<[], [llvm_anyint_ty], []>; class UnaryIntNoRetFloat : Intrinsic<[], [llvm_anyfloat_ty], []>; class BinaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>; + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class BinaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], []>; + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class BinaryIntNoRetInt : Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; class BinaryIntNoRetFloat : Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; class TernaryIntInt : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], []>; + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class TernaryIntFloat : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], []>; + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class QuaternaryIntInt : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], []>; + LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class UnaryAtomicInt : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; class BinaryAtomicInt : diff --git a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td index 73ef4aa..98af358 100644 --- a/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; } let TargetPrefix = "r600", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td index cd76135..8efa29b 100644 --- a/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td @@ -12,5 +12,5 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "R600", isTarget = 1 in { - def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>; + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; } diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 4a70639..7a32bb0 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -157,4 +157,20 @@ unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan); void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx); +LLVMValueRef +build_intrinsic(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args, + LLVMAttribute attr); + +void +build_tgsi_intrinsic_nomem( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data); + + + #endif /* RADEON_LLVM_H */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 0689b6a..cbe052d 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -34,8 +34,10 @@ #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_debug.h" +#include #include static struct radeon_llvm_loop * get_current_loop(struct radeon_llvm_context * ctx) @@ -522,9 +524,9 @@ static void emit_prepare_cube_coords( LLVMValueRef mad_args[3]; unsigned i, cnt; - LLVMValueRef v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", + LLVMValueRef v = build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4), - &emit_data->args[0],1); + &emit_data->args[0],1, LLVMReadNoneAttribute); /* save src.w for shadow cube */ cnt = shadowcube ? 3 : 4; @@ -534,21 +536,21 @@ static void emit_prepare_cube_coords( coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); } - coords[2] = lp_build_intrinsic(builder, "llvm.AMDIL.fabs.", - type, &coords[2], 1); - coords[2] = lp_build_intrinsic(builder, "llvm.AMDGPU.rcp", - type, &coords[2], 1); + coords[2] = build_intrinsic(builder, "llvm.AMDIL.fabs.", + type, &coords[2], 1, LLVMReadNoneAttribute); + coords[2] = build_intrinsic(builder, "llvm.AMDGPU.rcp", + type, &coords[2], 1, LLVMReadNoneAttribute); mad_args[1] = coords[2]; mad_args[2] = LLVMConstReal(type, 1.5); mad_args[0] = coords[0]; - coords[0] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", - type, mad_args, 3); + coords[0] = build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3, LLVMReadNoneAttribute); mad_args[0] = coords[1]; - coords[1] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", - type, mad_args, 3); + coords[1] = build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3, LLVMReadNoneAttribute); /* apply yxwy swizzle to cooords */ coords[2] = coords[3]; @@ -859,6 +861,51 @@ static void emit_immediate(struct lp_build_tgsi_context * bld_base, ctx->soa.num_immediates++; } +LLVMValueRef +build_intrinsic(LLVMBuilderRef builder, + const char *name, + LLVMTypeRef ret_type, + LLVMValueRef *args, + unsigned num_args, + LLVMAttribute attr) +{ + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + LLVMValueRef function; + + function = LLVMGetNamedFunction(module, name); + if(!function) { + LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS]; + unsigned i; + + assert(num_args <= LP_MAX_FUNC_ARGS); + + for(i = 0; i < num_args; ++i) { + assert(args[i]); + arg_types[i] = LLVMTypeOf(args[i]); + } + + function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args); + + if (attr) + LLVMAddFunctionAttr(function, attr); + } + + return LLVMBuildCall(builder, function, args, num_args, ""); +} + +void +build_tgsi_intrinsic_nomem( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_context * base = &bld_base->base; + emit_data->output[emit_data->chan] = build_intrinsic( + base->gallivm->builder, action->intr_name, + emit_data->dst_type, emit_data->args, + emit_data->arg_count, LLVMReadNoneAttribute); +} + void radeon_llvm_context_init(struct radeon_llvm_context * ctx) { struct lp_type type; @@ -918,7 +965,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_set_default_actions(bld_base); - bld_base->op_actions[TGSI_OPCODE_IABS].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IABS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; @@ -949,91 +996,91 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_ROUND].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; - bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; - bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; - bld_base->op_actions[TGSI_OPCODE_IMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; - bld_base->op_actions[TGSI_OPCODE_IMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; - bld_base->op_actions[TGSI_OPCODE_UMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin"; - bld_base->op_actions[TGSI_OPCODE_UMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; - bld_base->op_actions[TGSI_OPCODE_CEIL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.posinf."; - bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs."; - bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_ARL].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_ARL].intr_name = "llvm.AMDGPU.arl"; bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; - bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp."; - bld_base->op_actions[TGSI_OPCODE_CMP].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; - bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos"; - bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_DIV].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div"; bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; - bld_base->op_actions[TGSI_OPCODE_EX2].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp."; - bld_base->op_actions[TGSI_OPCODE_FLR].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.AMDGPU.floor"; - bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill"; bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp"; - bld_base->op_actions[TGSI_OPCODE_LG2].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.AMDIL.log."; - bld_base->op_actions[TGSI_OPCODE_LRP].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_LRP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_LRP].intr_name = "llvm.AMDGPU.lrp"; - bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; - bld_base->op_actions[TGSI_OPCODE_MAD].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MAD].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MAD].intr_name = "llvm.AMDIL.mad."; - bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; - bld_base->op_actions[TGSI_OPCODE_MUL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MUL].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_MUL].intr_name = "llvm.AMDGPU.mul"; - bld_base->op_actions[TGSI_OPCODE_POW].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.AMDGPU.pow"; - bld_base->op_actions[TGSI_OPCODE_RCP].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_RCP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_RCP].intr_name = "llvm.AMDGPU.rcp"; - bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SSG].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg"; - bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SGE].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge"; - bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq"; bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; - bld_base->op_actions[TGSI_OPCODE_SLE].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SLE].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SLE].intr_name = "llvm.AMDGPU.sge"; bld_base->op_actions[TGSI_OPCODE_SLT].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; - bld_base->op_actions[TGSI_OPCODE_SLT].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SLT].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SLT].intr_name = "llvm.AMDGPU.sgt"; - bld_base->op_actions[TGSI_OPCODE_SNE].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SNE].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SNE].intr_name = "llvm.AMDGPU.sne"; - bld_base->op_actions[TGSI_OPCODE_SGT].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SGT].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SGT].intr_name = "llvm.AMDGPU.sgt"; - bld_base->op_actions[TGSI_OPCODE_SIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.AMDGPU.sin"; bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex"; @@ -1045,10 +1092,10 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; - bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; - bld_base->rsq_action.emit = lp_build_tgsi_intrinsic; + bld_base->rsq_action.emit = build_tgsi_intrinsic_nomem; bld_base->rsq_action.intr_name = "llvm.AMDGPU.rsq"; } -- 2.7.4