From da1d8adc2934efb29ee579f43dbc09bb0cfd1e2c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 3 Aug 2018 20:22:34 -0400 Subject: [PATCH] radeonsi: remove fetch_args callbacks for ALU instructions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Tested-by: Dieter Nützel Reviewed-by: Samuel Pitoiset --- src/gallium/drivers/radeonsi/si_shader.c | 73 ++++++++----------- src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 85 +++++++---------------- 2 files changed, 55 insertions(+), 103 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 30e150e..3582e53 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4002,12 +4002,24 @@ static LLVMValueRef si_llvm_emit_ddxy_interp( return ac_build_gather_values(&ctx->ac, result, 4); } -static void interp_fetch_args( - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) +static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + struct si_shader *shader = ctx->shader; + const struct tgsi_shader_info *info = &shader->selector->info; + LLVMValueRef interp_param; const struct tgsi_full_instruction *inst = emit_data->inst; + const struct tgsi_full_src_register *input = &inst->Src[0]; + int input_base, input_array_size; + int chan; + int i; + LLVMValueRef prim_mask = ctx->abi.prim_mask; + LLVMValueRef array_idx; + int interp_param_idx; + unsigned interp; + unsigned location; if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { /* offset is in second src, first two channels */ @@ -4068,26 +4080,6 @@ static void interp_fetch_args( emit_data->args[1] = LLVMBuildFSub(ctx->ac.builder, emit_data->args[1], halfval, ""); emit_data->arg_count = 2; } -} - -static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct si_shader *shader = ctx->shader; - const struct tgsi_shader_info *info = &shader->selector->info; - LLVMValueRef interp_param; - const struct tgsi_full_instruction *inst = emit_data->inst; - const struct tgsi_full_src_register *input = &inst->Src[0]; - int input_base, input_array_size; - int chan; - int i; - LLVMValueRef prim_mask = ctx->abi.prim_mask; - LLVMValueRef array_idx; - int interp_param_idx; - unsigned interp; - unsigned location; assert(input->Register.File == TGSI_FILE_INPUT); @@ -4244,19 +4236,6 @@ static void ballot_emit( emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, ctx->i32_1, ""); } -static void read_invoc_fetch_args( - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, emit_data->src_chan); - - /* Always read the source invocation (= lane) from the X channel. */ - emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, - 1, TGSI_CHAN_X); - emit_data->arg_count = 2; -} - static void read_lane_emit( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, @@ -4264,6 +4243,16 @@ static void read_lane_emit( { struct si_shader_context *ctx = si_shader_context(bld_base); + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_READ_INVOC) { + emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, + 0, emit_data->src_chan); + + /* Always read the source invocation (= lane) from the X channel. */ + emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, + 1, TGSI_CHAN_X); + emit_data->arg_count = 2; + } + /* We currently have no other way to prevent LLVM from lifting the icmp * calls to a dominating basic block. */ @@ -4426,11 +4415,6 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } -static const struct lp_build_tgsi_action interp_action = { - .fetch_args = interp_fetch_args, - .emit = build_interp_intrinsic, -}; - static void si_create_function(struct si_shader_context *ctx, const char *name, LLVMTypeRef *returns, unsigned num_returns, @@ -5989,9 +5973,9 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base = &ctx->bld_base; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; - bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; - bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action; - bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action; + bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID].emit = build_interp_intrinsic; + bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE].emit = build_interp_intrinsic; + bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET].emit = build_interp_intrinsic; bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; @@ -6009,7 +5993,6 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane"; bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit; bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane"; - bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args; bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit; bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c index 4c90874..028ca8b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c @@ -25,31 +25,6 @@ #include "si_shader_internal.h" #include "ac_llvm_util.h" -static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned i; - LLVMValueRef conds[TGSI_NUM_CHANNELS]; - - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); - conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value, - ctx->ac.f32_0, ""); - } - - /* And the conditions together */ - for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { - conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], ""); - } - - emit_data->dst_type = ctx->voidt; - emit_data->arg_count = 1; - emit_data->args[0] = conds[0]; -} - void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); @@ -77,7 +52,23 @@ static void kil_emit(const struct lp_build_tgsi_action *action, LLVMValueRef visible; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) { - visible = emit_data->args[0]; + const struct tgsi_full_instruction *inst = emit_data->inst; + struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMBuilderRef builder = ctx->ac.builder; + unsigned i; + LLVMValueRef conds[TGSI_NUM_CHANNELS]; + + for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); + conds[i] = LLVMBuildFCmp(builder, LLVMRealOGE, value, + ctx->ac.f32_0, ""); + } + + /* And the conditions together */ + for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { + conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], ""); + } + visible = conds[0]; } else { assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL); visible = LLVMConstInt(ctx->i1, false, 0); @@ -596,20 +587,16 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action, emit_data->args[1], ""); } -static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, TGSI_CHAN_X); - emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, TGSI_CHAN_Y); -} - static void emit_pk2h(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMValueRef v[] = { + lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X), + lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y), + }; + /* From the GLSL 4.50 spec: * "The rounding mode cannot be set and is undefined." @@ -617,18 +604,10 @@ static void emit_pk2h(const struct lp_build_tgsi_action *action, * v_cvt_pkrtz_f16 rounds to zero, but it's fastest. */ emit_data->output[emit_data->chan] = - LLVMBuildBitCast(ctx->ac.builder, - ac_build_cvt_pkrtz_f16(&ctx->ac, emit_data->args), + LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v), ctx->i32, ""); } -static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, TGSI_CHAN_X); -} - static void emit_up2h(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) @@ -640,7 +619,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action, i16 = LLVMInt16TypeInContext(ctx->ac.context); const16 = LLVMConstInt(ctx->i32, 16, 0); - input = emit_data->args[0]; + input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); for (i = 0; i < 2; i++) { val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input; @@ -676,25 +655,19 @@ static void emit_rsq(const struct lp_build_tgsi_action *action, ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt); } -static void dfracexp_fetch_args(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); - emit_data->arg_count = 1; -} - static void dfracexp_emit(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); emit_data->output[emit_data->chan] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64", - ctx->ac.f64, &emit_data->args[0], 1, 0); + ctx->ac.f64, &in, 1, 0); emit_data->output1[emit_data->chan] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64", - ctx->ac.i32, &emit_data->args[0], 1, 0); + ctx->ac.i32, &in, 1, 0); } void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) @@ -735,7 +708,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64"; - bld_base->op_actions[TGSI_OPCODE_DFRACEXP].fetch_args = dfracexp_fetch_args; bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit; bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64"; @@ -764,7 +736,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; - bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args; bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem; @@ -780,7 +751,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; - bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args; bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h; bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; @@ -817,7 +787,6 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base) bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; - bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args; bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h; bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int; -- 2.7.4