nv50/ir: fold fma/mad when all 3 args are immediates
authorIlia Mirkin <imirkin@alum.mit.edu>
Thu, 3 Dec 2015 02:02:12 +0000 (21:02 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Fri, 4 Dec 2015 04:02:57 +0000 (23:02 -0500)
This happens pretty rarely, but might as well do it when it does.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp

index bb7f491..b79e465 100644 (file)
@@ -670,6 +670,34 @@ ConstantFolding::expr(Instruction *i,
       res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask);
       break;
    }
+   case OP_MAD:
+   case OP_FMA: {
+      switch (i->dType) {
+      case TYPE_F32:
+         res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) +
+            c->data.f32;
+         break;
+      case TYPE_F64:
+         res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64;
+         break;
+      case TYPE_S32:
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32;
+            break;
+         }
+         /* fallthrough */
+      case TYPE_U32:
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32;
+            break;
+         }
+         res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32;
+         break;
+      default:
+         return;
+      }
+      break;
+   }
    default:
       return;
    }
@@ -684,6 +712,8 @@ ConstantFolding::expr(Instruction *i,
    i->setSrc(2, NULL);
 
    i->getSrc(0)->reg.data = res.data;
+   i->getSrc(0)->reg.type = i->dType;
+   i->getSrc(0)->reg.size = typeSizeof(i->dType);
 
    i->op = OP_MOV;
 }