From 4e80b584fe2728a2657f0c338d99f7cde64777ec Mon Sep 17 00:00:00 2001
From: Tim Northover <tnorthover@apple.com>
Date: Fri, 18 Jul 2014 13:01:19 +0000
Subject: [PATCH] ARM: support legalisation of "fptrunc ... to half"
 operations.

llvm-svn: 213373
---
 .../CodeGen/SelectionDAG/LegalizeFloatTypes.cpp    | 23 ++++++++++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h      |  1 +
 llvm/lib/Target/ARM/ARMISelLowering.cpp            | 10 +++--
 llvm/test/CodeGen/ARM/half.ll                      | 50 +++++++++++++++++++++-
 4 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 20ec3ac..e3c034c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -397,6 +397,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
+  if (N->getValueType(0) == MVT::f16) {
+    // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
+    // storage-only type get a chance to select things.
+    return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
+  }
+
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
   return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
@@ -632,6 +638,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
+  case ISD::FP_EXTEND:   Res = SoftenFloatOp_FP_EXTEND(N); break;
   case ISD::FP_TO_FP16:  // Same as FP_ROUND for softening purposes
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -661,6 +668,22 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
                      GetSoftenedFloat(N->getOperand(0)));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
+  // If we get here, the result must be legal but the source illegal.
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+
+  if (SVT == MVT::f16)
+    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
+
+  RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
+
+  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+}
+
+
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
   // We actually deal with the partially-softened FP_TO_FP16 node too, which
   // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63c74a0..117ff31e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -425,6 +425,7 @@ private:
   bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
   SDValue SoftenFloatOp_BITCAST(SDNode *N);
   SDValue SoftenFloatOp_BR_CC(SDNode *N);
+  SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 27048f9..f6f937a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -396,8 +396,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     if (!Subtarget->isFPOnlySP())
       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
-
-    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   }
 
   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -582,8 +580,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // ARM does not have f32 extending load.
+  // ARM does not have floating-point extending loads.
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+
+  // ... or truncating stores
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f16, Expand);
 
   // ARM does not have i1 sign extending load.
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
diff --git a/llvm/test/CodeGen/ARM/half.ll b/llvm/test/CodeGen/ARM/half.ll
index 72da1a5..10cebb3 100644
--- a/llvm/test/CodeGen/ARM/half.ll
+++ b/llvm/test/CodeGen/ARM/half.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OLD
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios7.0 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-F16
+; RUN: llc < %s -mtriple=thumbv8-apple-ios7.0 | FileCheck %s --check-prefix=CHECK  --check-prefix=CHECK-V8
 
 define void @test_load_store(half* %in, half* %out) {
 ; CHECK-LABEL: test_load_store:
@@ -24,3 +26,49 @@ define void @test_bitcast_to_half(half* %addr, i16 %in) {
   store half %val_fp, half* %addr
   ret void
 }
+
+define float @test_extend32(half* %addr) {
+; CHECK-LABEL: test_extend32:
+
+; CHECK-OLD: b.w ___gnu_h2f_ieee
+; CHECK-F16: vcvtb.f32.f16
+; CHECK-V8: vcvtb.f32.f16
+  %val16 = load half* %addr
+  %val32 = fpext half %val16 to float
+  ret float %val32
+}
+
+define double @test_extend64(half* %addr) {
+; CHECK-LABEL: test_extend64:
+
+; CHECK-OLD: blx ___gnu_h2f_ieee
+; CHECK-OLD: vcvt.f64.f32
+; CHECK-F16: vcvtb.f32.f16
+; CHECK-F16: vcvt.f64.f32
+; CHECK-V8: vcvtb.f64.f16
+  %val16 = load half* %addr
+  %val32 = fpext half %val16 to double
+  ret double %val32
+}
+
+define void @test_trunc32(float %in, half* %addr) {
+; CHECK-LABEL: test_trunc32:
+
+; CHECK-OLD: blx ___gnu_f2h_ieee
+; CHECK-F16: vcvtb.f16.f32
+; CHECK-V8: vcvtb.f16.f32
+  %val16 = fptrunc float %in to half
+  store half %val16, half* %addr
+  ret void
+}
+
+define void @test_trunc64(double %in, half* %addr) {
+; CHECK-LABEL: test_trunc64:
+
+; CHECK-OLD: blx ___truncdfhf2
+; CHECK-F16: blx ___truncdfhf2
+; CHECK-V8: vcvtb.f16.f64
+  %val16 = fptrunc double %in to half
+  store half %val16, half* %addr
+  ret void
+}
-- 
2.7.4