From d2a3de19c6aa5881228734c73df706483a4aecf9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Wed, 14 May 2014 23:22:32 -0400 Subject: [PATCH] nv50/ir: fix constant folding for OP_MUL subop HIGH These instructions can come in either through IMUL_HI/UMUL_HI TGSI opcodes, or from OP_DIV constant folding. Also make sure that the constant foldings which delete the original instruction still get counted as having done something. Signed-off-by: Ilia Mirkin Cc: "10.1 10.2" Reviewed-by: Ben Skeggs --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 47 ++++++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 3005922..e6f8f69 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -425,7 +425,17 @@ ConstantFolding::expr(Instruction *i, case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break; case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; case TYPE_S32: - case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break; + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32; + break; + } + /* fallthrough */ + case TYPE_U32: + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32; + break; + } + res.data.u32 = a->data.u32 * b->data.u32; break; default: return; } @@ -691,12 +701,41 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) { const int t = !s; const operation op = i->op; + Instruction *newi = i; switch (i->op) { case OP_MUL: if (i->dType == TYPE_F32) tryCollapseChainedMULs(i, s, imm0); + if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { + assert(!isFloatType(i->sType)); + if (imm0.isInteger(1) && i->dType == TYPE_S32) { + bld.setPosition(i, false); + // Need to set to the sign value, which is a compare. + newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0), + TYPE_S32, i->getSrc(t), bld.mkImm(0)); + delete_Instruction(prog, i); + } else if (imm0.isInteger(0) || imm0.isInteger(1)) { + // The high bits can't be set in this case (either mul by 0 or + // unsigned by 1) + i->op = OP_MOV; + i->subOp = 0; + i->setSrc(0, new_ImmediateValue(prog, 0u)); + i->src(0).mod = Modifier(0); + i->setSrc(1, NULL); + } else if (!imm0.isNegative() && imm0.isPow2()) { + // Translate into a shift + imm0.applyLog2(); + i->op = OP_SHR; + i->subOp = 0; + imm0.reg.data.u32 = 32 - imm0.reg.data.u32; + i->setSrc(0, i->getSrc(t)); + i->src(0).mod = i->src(t).mod; + i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); + i->src(1).mod = 0; + } + } else if (imm0.isInteger(0)) { i->op = OP_MOV; i->setSrc(0, new_ImmediateValue(prog, 0u)); @@ -787,7 +826,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) else tA = tB; tB = s ? bld.getSSA() : i->getDef(0); - bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); + newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); if (s) bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); @@ -819,7 +858,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) tA = bld.getSSA(); bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0)); tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); - bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); + newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); if (d < 0) bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); @@ -897,7 +936,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) default: return; } - if (i->op != op) + if (newi->op != op) foldCount++; } -- 2.7.4