From b5f19db9766ac54d78b8087b0433011f908ebd2c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 28 Feb 2015 14:31:45 +0100 Subject: [PATCH] radeonsi: implement TGSI_OPCODE_BFI (v2) v2: Don't use the intrinsics, the shader backend can recognize these patterns and generates optimal code automatically. Reviewed-by: Tom Stellard --- docs/GL3.txt | 2 +- .../drivers/radeon/radeon_setup_tgsi_llvm.c | 34 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 267740a..b295149 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -102,7 +102,7 @@ GL 4.0, GLSL 4.00: - Dynamically uniform UBO array indices DONE (r600) - Implicit signed -> unsigned conversions DONE - Fused multiply-add DONE () - - Packing/bitfield/conversion functions DONE (r600) + - Packing/bitfield/conversion functions DONE (r600, radeonsi) - Enhanced textureGather DONE (r600, radeonsi) - Geometry shader instancing DONE (r600) - Geometry shader multiple streams DONE () diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 0034b56..d89e2b4 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1234,6 +1234,39 @@ build_tgsi_intrinsic_nomem( build_tgsi_intrinsic(action, bld_base, emit_data, LLVMReadNoneAttribute); } +static void emit_bfi(const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bfi_args[3]; + + // Calculate the bitmask: (((1 << src3) - 1) << src2 + bfi_args[0] = LLVMBuildShl(builder, + LLVMBuildSub(builder, + LLVMBuildShl(builder, + bld_base->int_bld.one, + emit_data->args[3], ""), + bld_base->int_bld.one, ""), + emit_data->args[2], ""); + + bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], + emit_data->args[2], ""); + + bfi_args[2] = emit_data->args[0]; + + /* Calculate: + * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + emit_data->output[emit_data->chan] = + LLVMBuildXor(builder, bfi_args[2], + LLVMBuildAnd(builder, bfi_args[0], + LLVMBuildXor(builder, bfi_args[1], bfi_args[2], + ""), ""), ""); +} + /* this is ffs in C */ static void emit_lsb(const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1381,6 +1414,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs"; bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; + bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev"; -- 2.7.4