nv50/ir: add optimization for modulo by a non-power-of-2 value
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 11 Nov 2017 03:10:46 +0000 (22:10 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Sun, 26 Nov 2017 06:10:03 +0000 (01:10 -0500)
We can still use the optimized division methods which make use of
multiplication with overflow.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index 2448c73..cfd623e 100644 (file)
@@ -1192,6 +1192,21 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 
             delete_Instruction(prog, i);
          }
+      } else if (s == 1) {
+         // In this case, we still want the optimized lowering that we get
+         // from having division by an immediate.
+         //
+         // a % b == a - (a/b) * b
+         bld.setPosition(i, false);
+         Value *div = bld.mkOp2v(OP_DIV, i->sType, bld.getSSA(),
+                                 i->getSrc(0), i->getSrc(1));
+         newi = bld.mkOp2(OP_ADD, i->sType, i->getDef(0), i->getSrc(0),
+                          bld.mkOp2v(OP_MUL, i->sType, bld.getSSA(), div, i->getSrc(1)));
+         // TODO: Check that target supports this. In this case, we know that
+         // all backends do.
+         newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
+
+         delete_Instruction(prog, i);
       }
       break;