Do not emit intermediate register for zero FP immediate
authorRenato Golin <renato.golin@linaro.org>
Thu, 23 Oct 2014 15:31:50 +0000 (15:31 +0000)
committerRenato Golin <renato.golin@linaro.org>
Thu, 23 Oct 2014 15:31:50 +0000 (15:31 +0000)
This updates check for double precision zero floating point constant to allow
use of instruction with immediate value rather than temporary register.
Currently "a == 0.0", where "a" is of "double" type generates:

vmov.i32        d16, #0x0
vcmpe.f64       d0, d16

With this change it becomes:

vcmpe.f64        d0, #0

Patch by Sergey Dmitrouk.

llvm-svn: 220486

llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll [new file with mode: 0644]

index 7df1383..680b497 100644 (file)
@@ -3245,6 +3245,18 @@ static bool isFloatingPointZero(SDValue Op) {
         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
           return CFP->getValueAPF().isPosZero();
     }
+  } else if (Op->getOpcode() == ISD::BITCAST &&
+             Op->getValueType(0) == MVT::f64) {
+    // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
+    // created by LowerConstantFP().
+    SDValue BitcastOp = Op->getOperand(0);
+    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
+      SDValue MoveOp = BitcastOp->getOperand(0);
+      if (MoveOp->getOpcode() == ISD::TargetConstant &&
+          cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
+        return true;
+      }
+    }
   }
   return false;
 }
diff --git a/llvm/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll b/llvm/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll
new file mode 100644 (file)
index 0000000..7444a68
--- /dev/null
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=linux-arm-gnueabihf -mattr=+neon %s -o - | FileCheck %s
+
+; Check that no intermediate integer register is used.
+define i32 @no-intermediate-register-for-zero-imm(double %x) #0 {
+entry:
+; CHECK-LABEL: no-intermediate-register-for-zero-imm
+; CHECK-NOT: vmov
+; CHECK: vcmp
+  %cmp = fcmp une double %x, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}