[ARM] Support float literals under XO

author Christof Douma <Christof.Douma@arm.com>

Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)

committer Christof Douma <Christof.Douma@arm.com>

Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)
author Christof Douma <Christof.Douma@arm.com>
Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)
committer Christof Douma <Christof.Douma@arm.com>
Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp

index 762da041244ddf94d44631faaa25ae0762c75fc1..414ade7fa6faee58136bf3c0af92328b28f45fc1 100644 (file)
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1283,6 +1283,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
    case ARMISD::VMOVhr:        return "ARMISD::VMOVhr";
    case ARMISD::VMOVrh:        return "ARMISD::VMOVrh";
+  case ARMISD::VMOVSR:        return "ARMISD::VMOVSR";
  
    case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
    case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
@@ -4518,9 +4519,10 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    bool InvalidOnQNaN;
    FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
  
-  // Try to generate VMAXNM/VMINNM on ARMv8.
-  if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
-                                  TrueVal.getValueType() == MVT::f64)) {
+  // Try to generate VMAXNM/VMINNM on ARMv8. Except if we compare to a zero.
+  // This ensures we use CMPFPw0 instead of CMPFP in such case.
+  if (Subtarget->hasFPARMv8() && !isFloatingPointZero(RHS) &&
+    (TrueVal.getValueType() == MVT::f32 || TrueVal.getValueType() == MVT::f64)) {
      bool swpCmpOps = false;
      bool swpVselOps = false;
      checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -5942,23 +5944,34 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
  
  SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                                             const ARMSubtarget *ST) const {
-  bool IsDouble = Op.getValueType() == MVT::f64;
+  EVT VT = Op.getValueType();
+  bool IsDouble = (VT == MVT::f64);
    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
    const APFloat &FPVal = CFP->getValueAPF();
  
    // Prevent floating-point constants from using literal loads
    // when execute-only is enabled.
    if (ST->genExecuteOnly()) {
+    // If we can represent the constant as an immediate, don't lower it
+    if (isFPImmLegal(FPVal, VT))
+      return Op;
+    // Otherwise, construct as integer, and move to float register
      APInt INTVal = FPVal.bitcastToAPInt();
      SDLoc DL(CFP);
-    if (IsDouble) {
-      SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
-      SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
-      if (!ST->isLittle())
-        std::swap(Lo, Hi);
-      return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
-    } else {
-      return DAG.getConstant(INTVal, DL, MVT::i32);
+    switch (VT.getSimpleVT().SimpleTy) {
+      default:
+        llvm_unreachable("Unknown floating point type!");
+        break;
+      case MVT::f64: {
+        SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
+        SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
+        if (!ST->isLittle())
+          std::swap(Lo, Hi);
+        return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
+      }
+      case MVT::f32:
+          return DAG.getNode(ARMISD::VMOVSR, DL, VT,
+              DAG.getConstant(INTVal, DL, MVT::i32));
      }
    }
  
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h

index b196e2344ea62b384d12c86c6767c679cbe32a4e..d3d3ac29c671ccd1f798819ea744fee50037ad71 100644 (file)
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -102,6 +102,7 @@ class VectorType;
  
        VMOVRRD,      // double to two gprs.
        VMOVDRR,      // Two gprs to double.
+      VMOVSR,       // move gpr to single, used for f32 literal constructed in a gpr
  
        EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
        EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td

index 8731a7fdce91ebbc368c724401e3dae4410a0a1b..e6a7730d467bbf4074f792f105e0df74f50615d2 100644 (file)
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -17,11 +17,14 @@ def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
  def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
                                         SDTCisVT<2, f64>]>;
  
+def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>;
+
  def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
  def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMFCmp, [SDNPOutGlue]>;
  def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
  def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
  def arm_fmrrd  : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
+def arm_vmovsr  : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>;
  
  def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >;
  def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >;
@@ -1066,6 +1069,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
    // pipelines.
    let D = VFPNeonDomain;
  }
+def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>;
  
  let hasSideEffects = 0 in {
  def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll

new file mode 100644 (file)

index 0000000..7f5bca8
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll
@@ -0,0 +1,118 @@
+; RUN: llc -mtriple=thumbv7m-arm-none-eabi -mattr=+execute-only,+fp-armv8 %s -o - | FileCheck %s
+
+; This function used to run into a code selection error on fp-armv8 due to
+; different ordering of the constant arguments of fcmp. Fixed by extending the
+; code selection to handle the missing case.
+define arm_aapcs_vfpcc void @foo0() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 0.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: foo0
+; CHECK: vcmpe.f32 {{s[0-9]+}}, #0
+
+
+define arm_aapcs_vfpcc void @float1() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 1.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: float1
+; CHECK: vmov.f32 [[FPREG:s[0-9]+]], #1.000000e+00
+; CHECK: vcmpe.f32 [[FPREG]], {{s[0-9]+}}
+
+define arm_aapcs_vfpcc void @float128() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt float undef, 128.000000e+00
+  %3 = select i1 %2, float -5.000000e-01, float 5.000000e-01
+  %4 = fadd nsz float undef, %3
+  %5 = fptosi float %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: float128
+; CHECK: mov.w [[REG:r[0-9]+]], #1124073472
+; CHECK: vmov [[FPREG:s[0-9]+]], [[REG]]
+; CHECK: vcmpe.f32 [[FPREG]], {{s[0-9]+}}
+
+
+define arm_aapcs_vfpcc void @double1() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt double undef, 1.000000e+00
+  %3 = select i1 %2, double -5.000000e-01, double 5.000000e-01
+  %4 = fadd nsz double undef, %3
+  %5 = fptosi double %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: double1
+; CHECK: vmov.f64 [[FPREG:d[0-9]+]], #1.000000e+00
+; CHECK: vcmpe.f64 [[FPREG]], {{d[0-9]+}}
+
+define arm_aapcs_vfpcc void @double128() local_unnamed_addr {
+  br i1 undef, label %.end, label %1
+
+  %2 = fcmp nsz olt double undef, 128.000000e+00
+  %3 = select i1 %2, double -5.000000e-01, double 5.000000e-01
+  %4 = fadd nsz double undef, %3
+  %5 = fptosi double %4 to i32
+  %6 = ashr i32 %5, 4
+  %7 = icmp slt i32 %6, 0
+  br i1 %7, label %8, label %.end
+
+  tail call arm_aapcs_vfpcc void @bar()
+  br label %.end
+
+.end:
+  ret void
+}
+; CHECK-LABEL: double128
+; CHECK: movs [[REGL:r[0-9]+]], #0
+; CHECK: movs [[REGH:r[0-9]+]], #0
+; CHECK: movt [[REGH]], #16480
+; CHECK: vmov [[FPREG:d[0-9]+]], [[REGL]], [[REGH]]
+; CHECK: vcmpe.f64 [[FPREG]], {{d[0-9]+}}
+
+
+declare arm_aapcs_vfpcc void @bar() local_unnamed_addr
+
author	Christof Douma <Christof.Douma@arm.com>
	Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)
committer	Christof Douma <Christof.Douma@arm.com>
	Fri, 23 Mar 2018 13:02:03 +0000 (13:02 +0000)
llvm/lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/ARM/ARMISelLowering.h		patch \| blob \| history
llvm/lib/Target/ARM/ARMInstrVFP.td		patch \| blob \| history
llvm/test/CodeGen/ARM/fcmp-xo.ll	[new file with mode: 0644]	patch \| blob