From d995115b1733ec14182e6bb4653b8f8389b87518 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 19 Dec 2016 16:11:27 +0100 Subject: [PATCH] gallium: remove TGSI_OPCODE_SUB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit It's redundant with the source modifier. Reviewed-by: Nicolai Hähnle --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 20 ++++++------ src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 38 +++------------------- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 6 ---- src/gallium/auxiliary/nir/tgsi_to_nir.c | 1 - src/gallium/auxiliary/tgsi/tgsi_aa_point.c | 20 ++++++------ src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 --- src/gallium/auxiliary/tgsi/tgsi_info.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_lowering.c | 22 ++++++++----- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h | 1 - src/gallium/auxiliary/tgsi/tgsi_point_sprite.c | 12 +++---- src/gallium/auxiliary/tgsi/tgsi_transform.h | 8 +++-- src/gallium/auxiliary/tgsi/tgsi_util.c | 1 - src/gallium/auxiliary/util/u_pstipple.c | 2 +- src/gallium/auxiliary/vl/vl_bicubic_filter.c | 4 +-- src/gallium/auxiliary/vl/vl_compositor.c | 4 +-- src/gallium/auxiliary/vl/vl_deint_filter.c | 8 ++--- src/gallium/drivers/i915/i915_fpc_optimize.c | 1 - src/gallium/drivers/i915/i915_fpc_translate.c | 11 ------- src/gallium/drivers/ilo/shader/toy_tgsi.c | 6 ---- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 -- src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c | 3 -- src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c | 3 -- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 - src/gallium/drivers/r600/r600_shader.c | 14 -------- src/gallium/drivers/svga/svga_tgsi_insn.c | 27 --------------- src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 25 -------------- src/gallium/include/pipe/p_shader_tokens.h | 2 +- src/gallium/state_trackers/xa/xa_tgsi.c | 4 +-- src/mesa/state_tracker/st_atifs_to_tgsi.c | 18 +++++----- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 ++-- src/mesa/state_tracker/st_tgsi_lower_yuv.c | 3 +- 33 files changed, 82 insertions(+), 202 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index c236caa..57ca12e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -285,7 +285,7 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, aactx->colorTemp, - TGSI_FILE_TEMPORARY, aactx->texTemp); + TGSI_FILE_TEMPORARY, aactx->texTemp, false); } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 33ef8ec..2b96b8a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -213,13 +213,13 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY, TGSI_FILE_INPUT, texInput, - TGSI_FILE_INPUT, texInput); + TGSI_FILE_INPUT, texInput, false); /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false); #if NORMALIZE /* OPTIONAL normalization of length */ /* RSQ t0.x, t0.x; */ @@ -237,7 +237,7 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false); /* KILL_IF -tmp0.yyyy; # if -tmp0.y < 0, KILL */ tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, @@ -246,10 +246,10 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) /* compute coverage factor = (1-d)/(1-k) */ /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z, TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true); /* RCP t0.z, t0.z; # t0.z = 1 / m */ newInst = tgsi_default_full_instruction(); @@ -265,22 +265,22 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) ctx->emit_instruction(ctx, &newInst); /* SUB t0.y, 1, t0.x; # d = 1 - d */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true); /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false); /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false); /* CMP t0.w, -t0.y, tex.w, t0.w; * # if -t0.y < 0 then @@ -318,7 +318,7 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) TGSI_FILE_OUTPUT, aactx->colorOutput, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, aactx->colorTemp, - TGSI_FILE_TEMPORARY, aactx->tmp0); + TGSI_FILE_TEMPORARY, aactx->tmp0, false); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 7d939e8..91e959f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -368,8 +368,8 @@ exp_emit( TGSI_OPCODE_EX2, floor_x); /* src0.x - floor( src0.x ) */ - emit_data->output[TGSI_CHAN_Y] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_SUB, emit_data->args[0] /* src0.x */, floor_x); + emit_data->output[TGSI_CHAN_Y] = + lp_build_sub(&bld_base->base, emit_data->args[0] /* src0.x */, floor_x); /* 2 ^ src0.x */ emit_data->output[TGSI_CHAN_Z] = lp_build_emit_llvm_unary(bld_base, @@ -394,8 +394,8 @@ frc_emit( LLVMValueRef tmp; tmp = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]); - emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_SUB, emit_data->args[0], tmp); + emit_data->output[emit_data->chan] = + lp_build_sub(&bld_base->base, emit_data->args[0], tmp); } /* TGSI_OPCODE_KILL_IF */ @@ -770,19 +770,6 @@ const struct lp_build_tgsi_action scs_action = { scs_emit /* emit */ }; -/* TGSI_OPCODE_SUB */ -static void -sub_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data->output[emit_data->chan] = - LLVMBuildFSub(bld_base->base.gallivm->builder, - emit_data->args[0], - emit_data->args[1], ""); -} - /* TGSI_OPCODE_F2U */ static void f2u_emit( @@ -956,7 +943,7 @@ xpd_helper( tmp0 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, a, b); tmp1 = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MUL, c, d); - return lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB, tmp0, tmp1); + return lp_build_sub(&bld_base->base, tmp0, tmp1); } static void @@ -1352,7 +1339,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base->op_actions[TGSI_OPCODE_MUL].emit = mul_emit; bld_base->op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit; bld_base->op_actions[TGSI_OPCODE_RCP].emit = rcp_emit; - bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit; bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit; bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit; @@ -2071,19 +2057,6 @@ ssg_emit_cpu( emit_data->args[0]); } -/* TGSI_OPCODE_SUB (CPU Only) */ - -static void -sub_emit_cpu( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->base, - emit_data->args[0], - emit_data->args[1]); -} - /* TGSI_OPCODE_TRUNC (CPU Only) */ static void @@ -2624,7 +2597,6 @@ lp_set_default_actions_cpu( bld_base->op_actions[TGSI_OPCODE_SLT].emit = slt_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SNE].emit = sne_emit_cpu; bld_base->op_actions[TGSI_OPCODE_SSG].emit = ssg_emit_cpu; - bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit_cpu; bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = trunc_emit_cpu; bld_base->rsq_action.emit = recip_sqrt_emit_cpu; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index a5e439f..6c177b0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -591,12 +591,6 @@ lp_emit_instruction_aos( dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; - case TGSI_OPCODE_SUB: - src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); - src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); - dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); - break; - case TGSI_OPCODE_LRP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index af4a6e0..f3e8700 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -1535,7 +1535,6 @@ static const nir_op op_trans[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_SLT] = nir_op_slt, [TGSI_OPCODE_SGE] = nir_op_sge, [TGSI_OPCODE_MAD] = nir_op_ffma, - [TGSI_OPCODE_SUB] = nir_op_fsub, [TGSI_OPCODE_LRP] = 0, [TGSI_OPCODE_SQRT] = nir_op_fsqrt, [TGSI_OPCODE_DP2A] = 0, diff --git a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c index 9016eff..4b14a2f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c +++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c @@ -148,16 +148,16 @@ aa_prolog(struct tgsi_transform_context *ctx) tmp0 = ts->tmp; /* SUB t0.xy, texIn, (0.5, 0,5) */ - tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY, TGSI_FILE_INPUT, texIn, - TGSI_FILE_IMMEDIATE, imm); + TGSI_FILE_IMMEDIATE, imm, true); /* DP2 t0.x, t0.xy, t0.xy; # t0.x = x^2 + y^2 */ tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, tmp0, - TGSI_FILE_TEMPORARY, tmp0); + TGSI_FILE_TEMPORARY, tmp0, false); /* SQRT t0.x, t0.x */ tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT, @@ -167,22 +167,22 @@ aa_prolog(struct tgsi_transform_context *ctx) /* compute coverage factor = (0.5-d)/(0.5-k) */ /* SUB t0.w, 0.5, texIn.z; # t0.w = 0.5-k */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z); + TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z, true); /* SUB t0.y, 0.5, t0.x; # t0.y = 0.5-d */ - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y, TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true); /* DIV t0.w, t0.y, t0.w; # coverage = (0.5-d)/(0.5-k) */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, - TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, false); /* If the coverage value is negative, it means the fragment is outside * the point's circular boundary. Kill it. @@ -198,7 +198,7 @@ aa_prolog(struct tgsi_transform_context *ctx) tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W, - TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W); + TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W, false); } /** @@ -249,7 +249,7 @@ aa_epilog(struct tgsi_transform_context *ctx) TGSI_FILE_OUTPUT, ts->color_out, TGSI_WRITEMASK_W, TGSI_FILE_TEMPORARY, ts->color_tmp, - TGSI_FILE_TEMPORARY, ts->tmp); + TGSI_FILE_TEMPORARY, ts->tmp, false); } /** diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 2f89de6..915cd10 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -5208,10 +5208,6 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_SUB: - exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_LRP: exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 9b2431f..a339ec2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -54,7 +54,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT }, { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE }, { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD }, - { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB }, + { 1, 2, 0, 0, 0, 0, 0, COMP, "", 17 }, /* removed */ { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP }, { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA }, { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c index b0a28f2..bf6cbb3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c @@ -468,12 +468,13 @@ transform_frc(struct tgsi_transform_context *tctx, /* SUB dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } @@ -689,12 +690,13 @@ transform_exp(struct tgsi_transform_context *tctx, /* SUB tmpA.x, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.x, src.x */ @@ -722,12 +724,13 @@ transform_exp(struct tgsi_transform_context *tctx, if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { /* SUB dst.y, src.x, tmpA.x */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } @@ -822,12 +825,13 @@ transform_log(struct tgsi_transform_context *tctx, /* SUB tmpA.y, tmpA.x, tmpA.y */ new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { /* FLR tmpA.y, tmpA.x */ @@ -1072,15 +1076,14 @@ transform_flr_ceil(struct tgsi_transform_context *tctx, /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ new_inst = tgsi_default_full_instruction(); - if (opcode == TGSI_OPCODE_CEIL) - new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; - else - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + if (opcode == TGSI_OPCODE_FLR) + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } } @@ -1124,7 +1127,7 @@ transform_trunc(struct tgsi_transform_context *tctx, tctx->emit_instruction(tctx, &new_inst); new_inst = tgsi_default_full_instruction(); - new_inst.Instruction.Opcode = TGSI_OPCODE_SUB; + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; new_inst.Instruction.NumDstRegs = 1; reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); new_inst.Instruction.NumSrcRegs = 2; @@ -1132,6 +1135,7 @@ transform_trunc(struct tgsi_transform_context *tctx, new_inst.Src[0].Register.Absolute = true; new_inst.Src[0].Register.Negate = false; reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); + new_inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &new_inst); } else { new_inst = tgsi_default_full_instruction(); diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index d78dd66..13c443f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -73,7 +73,6 @@ OP12(MAX) OP12(SLT) OP12(SGE) OP13(MAD) -OP12(SUB) OP13(LRP) OP11(SQRT) OP13(DP2A) diff --git a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c index 713bd60..f60a17c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c +++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c @@ -295,7 +295,7 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W); + TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W, false); /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */ inst = tgsi_default_full_instruction(); @@ -323,15 +323,15 @@ psprite_emit_vertex_inst(struct tgsi_transform_context *ctx, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, TGSI_FILE_TEMPORARY, ts->point_size_tmp, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, false); - tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB, + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, ts->point_coord_k, TGSI_WRITEMASK_X, TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Z, TGSI_FILE_TEMPORARY, ts->point_coord_k, - TGSI_SWIZZLE_X); + TGSI_SWIZZLE_X, true); } @@ -442,13 +442,13 @@ psprite_inst(struct tgsi_transform_context *ctx, tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y); + TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y, false); /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X, TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X, - TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W); + TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W, false); } else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Dst[0].Register.Index == ts->point_pos_out) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index c21ff95..7ea8206 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -281,7 +281,8 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, unsigned src0_file, unsigned src0_index, unsigned src1_file, - unsigned src1_index) + unsigned src1_index, + bool src1_negate) { struct tgsi_full_instruction inst; @@ -296,6 +297,7 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; + inst.Src[1].Register.Negate = src1_negate; ctx->emit_instruction(ctx, &inst); } @@ -388,7 +390,8 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, unsigned src0_swizzle, unsigned src1_file, unsigned src1_index, - unsigned src1_swizzle) + unsigned src1_swizzle, + bool src1_negate) { struct tgsi_full_instruction inst; @@ -403,6 +406,7 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx, inst.Src[0].Register.Index = src0_index; inst.Src[1].Register.File = src1_file; inst.Src[1].Register.Index = src1_index; + inst.Src[1].Register.Negate = src1_negate; switch (dst_writemask) { case TGSI_WRITEMASK_X: inst.Src[0].Register.SwizzleX = src0_swizzle; diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 4f02829..4a6a2ae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -192,7 +192,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_MAD: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_LRP: case TGSI_OPCODE_FMA: case TGSI_OPCODE_FRC: diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index f6ea535..ae4cfa1 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -344,7 +344,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) TGSI_FILE_TEMPORARY, texTemp, TGSI_WRITEMASK_XYZW, pctx->wincoordFile, wincoordInput, - TGSI_FILE_IMMEDIATE, pctx->numImmed); + TGSI_FILE_IMMEDIATE, pctx->numImmed, false); /* TEX texTemp, texTemp, sampler, 2D; */ tgsi_transform_tex_inst(ctx, diff --git a/src/gallium/auxiliary/vl/vl_bicubic_filter.c b/src/gallium/auxiliary/vl/vl_bicubic_filter.c index 0364d43..774702c 100644 --- a/src/gallium/auxiliary/vl/vl_bicubic_filter.c +++ b/src/gallium/auxiliary/vl/vl_bicubic_filter.c @@ -186,8 +186,8 @@ create_frag_shader(struct vl_bicubic_filter *filter, unsigned video_width, * t = frac(temp) * vtex = floor(i_vtex)/i_size */ - ureg_SUB(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), - i_vtex, half_pixel); + ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY), + i_vtex, ureg_negate(half_pixel)); ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY), ureg_src(t_array[21]), ureg_imm2f(shader, video_width, video_height)); ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY), diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c index 297c3ab..1efcb75 100644 --- a/src/gallium/auxiliary/vl/vl_compositor.c +++ b/src/gallium/auxiliary/vl/vl_compositor.c @@ -151,8 +151,8 @@ create_frag_shader_weave(struct ureg_program *shader, struct ureg_dst fragment) */ for (i = 0; i < 2; ++i) { ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); - ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), - i_tc[i], ureg_imm1f(shader, 0.5f)); + ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), + i_tc[i], ureg_imm1f(shader, -0.5f)); ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i])); ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), ureg_imm1f(shader, i ? 1.0f : 0.0f)); diff --git a/src/gallium/auxiliary/vl/vl_deint_filter.c b/src/gallium/auxiliary/vl/vl_deint_filter.c index 3ca3b49..2eec5cb 100644 --- a/src/gallium/auxiliary/vl/vl_deint_filter.c +++ b/src/gallium/auxiliary/vl/vl_deint_filter.c @@ -173,21 +173,21 @@ create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field, // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } else { /* interpolating bottom field -> current field is a top field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_negate(ureg_src(t_b))); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next); - ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); + ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_negate(ureg_src(t_b))); } // absolute maximum of differences diff --git a/src/gallium/drivers/i915/i915_fpc_optimize.c b/src/gallium/drivers/i915/i915_fpc_optimize.c index 7c3b9a9..5f2a876 100644 --- a/src/gallium/drivers/i915/i915_fpc_optimize.c +++ b/src/gallium/drivers/i915/i915_fpc_optimize.c @@ -115,7 +115,6 @@ static const struct { [ TGSI_OPCODE_SLT ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SNE ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_SSG ] = { false, false, 0, 1, 1 }, - [ TGSI_OPCODE_SUB ] = { false, false, 0, 1, 2 }, [ TGSI_OPCODE_TEX ] = { true, false, 0, 1, 2 }, [ TGSI_OPCODE_TRUNC ] = { false, false, 0, 1, 1 }, [ TGSI_OPCODE_TXB ] = { true, false, 0, 1, 2 }, diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 80caf31..241c92d 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -1022,17 +1022,6 @@ i915_translate_instruction(struct i915_fp_compile *p, negate(tmp, 1, 1, 1, 1), 0); break; - case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->Src[0], fs); - src1 = src_vector(p, &inst->Src[1], fs); - - i915_emit_arith(p, - A0_ADD, - get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, - src0, negate(src1, 1, 1, 1, 1), 0); - break; - case TGSI_OPCODE_TEX: emit_tex(p, inst, T0_TEXLD, fs); break; diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index a88f189..4d813f0 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -50,7 +50,6 @@ static const struct { [TGSI_OPCODE_MAX] = { GEN6_OPCODE_SEL, 1, 2 }, /* a later pass will move src[2] to accumulator */ [TGSI_OPCODE_MAD] = { GEN6_OPCODE_MAC, 1, 3 }, - [TGSI_OPCODE_SUB] = { GEN6_OPCODE_ADD, 1, 2 }, [TGSI_OPCODE_SQRT] = { TOY_OPCODE_SQRT, 1, 1 }, [TGSI_OPCODE_FRC] = { GEN6_OPCODE_FRC, 1, 1 }, [TGSI_OPCODE_FLR] = { GEN6_OPCODE_RNDD, 1, 1 }, @@ -144,9 +143,6 @@ aos_simple(struct toy_compiler *tc, case TGSI_OPCODE_UMAX: cond_modifier = GEN6_COND_GE; break; - case TGSI_OPCODE_SUB: - src[1] = tsrc_negate(src[1]); - break; case TGSI_OPCODE_IABS: src[0] = tsrc_absolute(src[0]); break; @@ -776,7 +772,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_SLT] = aos_set_on_cond, [TGSI_OPCODE_SGE] = aos_set_on_cond, [TGSI_OPCODE_MAD] = aos_simple, - [TGSI_OPCODE_SUB] = aos_simple, [TGSI_OPCODE_LRP] = aos_LRP, [TGSI_OPCODE_SQRT] = aos_simple, [TGSI_OPCODE_DP2A] = aos_DP2A, @@ -1318,7 +1313,6 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_SLT] = soa_per_channel, [TGSI_OPCODE_SGE] = soa_per_channel, [TGSI_OPCODE_MAD] = soa_per_channel, - [TGSI_OPCODE_SUB] = soa_per_channel, [TGSI_OPCODE_LRP] = soa_per_channel, [TGSI_OPCODE_SQRT] = soa_scalar_replicate, [TGSI_OPCODE_DP2A] = soa_dot_product, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index b919098..86348e7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -723,7 +723,6 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SGE, SET); NV50_IR_OPCODE_CASE(MAD, MAD); NV50_IR_OPCODE_CASE(FMA, FMA); - NV50_IR_OPCODE_CASE(SUB, SUB); NV50_IR_OPCODE_CASE(FLR, FLOOR); NV50_IR_OPCODE_CASE(ROUND, CVT); @@ -2988,7 +2987,6 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) case TGSI_OPCODE_SHL: case TGSI_OPCODE_ISHR: case TGSI_OPCODE_USHR: - case TGSI_OPCODE_SUB: case TGSI_OPCODE_XOR: FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { src0 = fetchSrc(0, c); diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c index d031c68..4924d21 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c @@ -750,9 +750,6 @@ nvfx_fragprog_parse_instruction(struct nvfx_fpc *fpc, } break; } - case TGSI_OPCODE_SUB: - nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], neg(src[1]), none)); - break; case TGSI_OPCODE_TEX: nvfx_fp_emit(fpc, tex(sat, TEX, unit, dst, mask, src[0], none, none)); break; diff --git a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c index a802c43..baea701 100644 --- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c @@ -672,9 +672,6 @@ nvfx_vertprog_parse_instruction(struct nvfx_vpc *vpc, case TGSI_OPCODE_SSG: nvfx_vp_emit(vpc, arith(sat, VEC, SSG, dst, mask, src[0], none, none)); break; - case TGSI_OPCODE_SUB: - nvfx_vp_emit(vpc, arith(sat, VEC, ADD, dst, mask, src[0], none, neg(src[1]))); - break; case TGSI_OPCODE_TRUNC: tmp = nvfx_src(temp(vpc)); insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 9d1e59f..59dfa05 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -50,7 +50,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_SLT: return RC_OPCODE_SLT; case TGSI_OPCODE_SGE: return RC_OPCODE_SGE; case TGSI_OPCODE_MAD: return RC_OPCODE_MAD; - case TGSI_OPCODE_SUB: return RC_OPCODE_SUB; case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c2996aa..ebe2744 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -3804,9 +3804,6 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool /* handle some special cases */ if (i == 1 || i == 3) { switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; case TGSI_OPCODE_DABS: r600_bytecode_src_set_abs(&alu.src[0]); break; @@ -3931,14 +3928,6 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) r600_bytecode_src(&alu.src[0], &ctx->src[1], i); r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } - /* handle some special cases */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; - default: - break; - } if (i == lasti || trans_only) { alu.last = 1; } @@ -9006,7 +8995,6 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, @@ -9205,7 +9193,6 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, @@ -9428,7 +9415,6 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 47a0afc..0efd72d 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -1403,30 +1403,6 @@ emit_ssg(struct svga_shader_emitter *emit, /** - * Translate/emit TGSI SUB instruction as: - * ADD DST, SRC0, negate(SRC1) - */ -static boolean -emit_sub(struct svga_shader_emitter *emit, - const struct tgsi_full_instruction *insn) -{ - SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); - struct src_register src0 = translate_src_register( - emit, &insn->Src[0] ); - struct src_register src1 = translate_src_register( - emit, &insn->Src[1] ); - - src1 = negate(src1); - - if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, - src0, src1 )) - return FALSE; - - return TRUE; -} - - -/** * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). */ static boolean @@ -2989,9 +2965,6 @@ svga_emit_instruction(struct svga_shader_emitter *emit, case TGSI_OPCODE_SLE: return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); - case TGSI_OPCODE_SUB: - return emit_sub( emit, insn ); - case TGSI_OPCODE_POW: return emit_pow( emit, insn ); diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index e7cfb40..3131444 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -4715,29 +4715,6 @@ emit_issg(struct svga_shader_emitter_v10 *emit, /** - * Emit code for TGSI_OPCODE_SUB instruction. - */ -static boolean -emit_sub(struct svga_shader_emitter_v10 *emit, - const struct tgsi_full_instruction *inst) -{ - /* dst = SUB(s0, s1): - * dst = s0 - s1 - * Translates into: - * ADD dst, s0, neg(s1) - */ - struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]); - - /* ADD dst, s0, neg(s1) */ - emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], - &inst->Src[0], &neg_src1, - inst->Instruction.Saturate); - - return TRUE; -} - - -/** * Emit a comparison instruction. The dest register will get * 0 or ~0 values depending on the outcome of comparing src0 to src1. */ @@ -5801,8 +5778,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, return emit_ssg(emit, inst); case TGSI_OPCODE_ISSG: return emit_issg(emit, inst); - case TGSI_OPCODE_SUB: - return emit_sub(emit, inst); case TGSI_OPCODE_TEX: return emit_tex(emit, inst); case TGSI_OPCODE_TXP: diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 3538090..3384035 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -345,7 +345,7 @@ struct tgsi_property_data { #define TGSI_OPCODE_SLT 14 #define TGSI_OPCODE_SGE 15 #define TGSI_OPCODE_MAD 16 -#define TGSI_OPCODE_SUB 17 +/* gap */ #define TGSI_OPCODE_LRP 18 #define TGSI_OPCODE_FMA 19 #define TGSI_OPCODE_SQRT 20 diff --git a/src/gallium/state_trackers/xa/xa_tgsi.c b/src/gallium/state_trackers/xa/xa_tgsi.c index f3f665d..344a576 100644 --- a/src/gallium/state_trackers/xa/xa_tgsi.c +++ b/src/gallium/state_trackers/xa/xa_tgsi.c @@ -239,10 +239,10 @@ radial_gradient(struct ureg_program *ureg, ureg_MUL(ureg, temp0, ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2)); ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1)); - ureg_SUB(ureg, temp2, ureg_src(temp3), ureg_src(temp0)); + ureg_ADD(ureg, temp2, ureg_src(temp3), ureg_negate(ureg_src(temp0))); ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2))); ureg_RCP(ureg, temp2, ureg_src(temp2)); - ureg_SUB(ureg, temp1, ureg_src(temp2), ureg_src(temp1)); + ureg_ADD(ureg, temp1, ureg_src(temp2), ureg_negate(ureg_src(temp1))); ureg_ADD(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_scalar(coords, TGSI_SWIZZLE_Z)); diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c b/src/mesa/state_tracker/st_atifs_to_tgsi.c index 3aa7f84..b28c55c 100644 --- a/src/mesa/state_tracker/st_atifs_to_tgsi.c +++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c @@ -66,7 +66,7 @@ static const struct instruction_desc inst_desc[] = { {TGSI_OPCODE_NOP, "UND", 0}, /* unused */ {TGSI_OPCODE_ADD, "ADD", 2}, {TGSI_OPCODE_MUL, "MUL", 2}, - {TGSI_OPCODE_SUB, "SUB", 2}, + {TGSI_OPCODE_NOP, "SUB", 2}, {TGSI_OPCODE_DP3, "DOT3", 2}, {TGSI_OPCODE_DP4, "DOT4", 2}, {TGSI_OPCODE_MAD, "MAD", 3}, @@ -175,16 +175,16 @@ prepare_argument(struct st_translate *t, const unsigned argId, if (srcReg->argMod & GL_COMP_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_imm1f(t->ureg, 1.0f); - modsrc[1] = ureg_src(arg); + modsrc[1] = ureg_negate(ureg_src(arg)); - ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_BIAS_BIT_ATI) { struct ureg_src modsrc[2]; modsrc[0] = ureg_src(arg); - modsrc[1] = ureg_imm1f(t->ureg, 0.5f); + modsrc[1] = ureg_imm1f(t->ureg, -0.5f); - ureg_insn(t->ureg, TGSI_OPCODE_SUB, &arg, 1, modsrc, 2); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); } if (srcReg->argMod & GL_2X_BIT_ATI) { struct ureg_src modsrc[2]; @@ -211,11 +211,13 @@ emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, struct ureg_dst tmp[1]; struct ureg_src src[3]; - if (!strcmp(desc->name, "CND")) { + if (!strcmp(desc->name, "SUB")) { + ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1])); + } else if (!strcmp(desc->name, "CND")) { tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */ src[0] = ureg_imm1f(t->ureg, 0.5f); - src[1] = args[2]; - ureg_insn(t->ureg, TGSI_OPCODE_SUB, tmp, 1, src, 2); + src[1] = ureg_negate(args[2]); + ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2); src[0] = ureg_src(tmp[0]); src[1] = args[0]; src[2] = args[1]; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3daf60a..df7a1bc 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1702,7 +1702,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_sub: - emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); + op[1].negate = ~op[1].negate; + emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; case ir_binop_mul: diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 1768356..f906fed 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -481,8 +481,6 @@ translate_opcode( unsigned op ) return TGSI_OPCODE_SIN; case OPCODE_SLT: return TGSI_OPCODE_SLT; - case OPCODE_SUB: - return TGSI_OPCODE_SUB; case OPCODE_TEX: return TGSI_OPCODE_TEX; case OPCODE_TXB: @@ -566,6 +564,10 @@ compile_instruction( ureg_MOV(ureg, dst[0], ureg_abs(src[0])); break; + case OPCODE_SUB: + ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1])); + break; + default: ureg_insn( ureg, translate_opcode( inst->Opcode ), diff --git a/src/mesa/state_tracker/st_tgsi_lower_yuv.c b/src/mesa/state_tracker/st_tgsi_lower_yuv.c index e346b97..6acd173 100644 --- a/src/mesa/state_tracker/st_tgsi_lower_yuv.c +++ b/src/mesa/state_tracker/st_tgsi_lower_yuv.c @@ -258,13 +258,14 @@ yuv_to_rgb(struct tgsi_transform_context *tctx, /* SUB tmpA.xyz, tmpA, imm[3] */ inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.Saturate = 0; inst.Instruction.NumDstRegs = 1; inst.Instruction.NumSrcRegs = 2; reg_dst(&inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); reg_src(&inst.Src[1], &ctx->imm[3], SWIZ(X, Y, Z, _)); + inst.Src[1].Register.Negate = 1; tctx->emit_instruction(tctx, &inst); /* DP3 dst.x, tmpA, imm[0] */ -- 2.7.4