From: Rhys Perry <pendingchaos02@gmail.com>
Date: Sat, 18 Aug 2018 14:06:50 +0000 (+0100)
Subject: nv50/ir: optimize multiplication by 16-bit immediates into two xmads
X-Git-Tag: upstream/19.0.0~2915
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d27c7918916cdc8092959124955f887592e37d72;p=platform%2Fupstream%2Fmesa.git

nv50/ir: optimize multiplication by 16-bit immediates into two xmads

Rather than the usual three that would be created.

total instructions in shared programs : 5796385 -> 5786560 (-0.17%)
total gprs used in shared programs    : 670103 -> 669968 (-0.02%)
total shared used in shared programs  : 548832 -> 548832 (0.00%)
total local used in shared programs   : 21164 -> 21068 (-0.45%)

                local     shared        gpr       inst      bytes
    helped           1           0          64        1040        1040
      hurt           0           0          27           0           0

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
---

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 1ab7437..ecb4bae 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -997,6 +997,16 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
       return true;
    }
 
+   if (typeSizeof(ty) == 4 && b >= 0 && b <= 0xffff &&
+       target->isOpSupported(OP_XMAD, TYPE_U32)) {
+      Value *tmp = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(),
+                              a, bld.mkImm((uint32_t)b), c ? c : bld.mkImm(0));
+      bld.mkOp3(OP_XMAD, TYPE_U32, def, a, bld.mkImm((uint32_t)b), tmp)->subOp =
+         NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_H1(0);
+
+      return true;
+   }
+
    return false;
 }