From: Matt Arsenault Date: Wed, 18 Jun 2014 17:05:30 +0000 (+0000) Subject: R600: Implement f64 ftrunc, ffloor and fceil. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4601093267a8f92ae8f223f3f7f1b6f07d95394d;p=platform%2Fupstream%2Fllvm.git R600: Implement f64 ftrunc, ffloor and fceil. CI has instructions for these, so this fixes them for older hardware. llvm-svn: 211183 --- diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index e09db64..1e6f38f 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -219,7 +219,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i1, Expand); if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); } if (!Subtarget->hasBFI()) { @@ -494,7 +497,10 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::SREM: return LowerSREM(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + case ISD::FCEIL: return LowerFCEIL(Op, DAG); + case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); case ISD::FRINT: return LowerFRINT(Op, DAG); + case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); // AMDIL DAG lowering. @@ -1571,6 +1577,84 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + // result = trunc(src) + // if (src > 0.0 && src != result) + // result += 1.0 + + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); + + const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64); + const SDValue One = DAG.getConstantFP(1.0, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + + SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); + SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); + SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); + + SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); +} + +SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + assert(Op.getValueType() == MVT::f64); + + const SDValue Zero = DAG.getConstant(0, MVT::i32); + const SDValue One = DAG.getConstant(1, MVT::i32); + + SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); + + // Extract the upper half, since this is where we will find the sign and + // exponent. + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One); + + const unsigned FractBits = 52; + const unsigned ExpBits = 11; + + // Extract the exponent. + SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32, + Hi, + DAG.getConstant(FractBits - 32, MVT::i32), + DAG.getConstant(ExpBits, MVT::i32)); + SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, + DAG.getConstant(1023, MVT::i32)); + + // Extract the sign bit. + const SDValue SignBitMask = DAG.getConstant(1ul << 31, MVT::i32); + SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); + + // Extend back to to 64-bits. + SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + Zero, SignBit); + SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); + + SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); + const SDValue FractMask = DAG.getConstant((1L << FractBits) - 1, MVT::i64); + + SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); + SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); + SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32); + + const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32); + + SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); + SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); + + SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); + SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); + + return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); +} + SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); @@ -1592,6 +1676,29 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); } +SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + // result = trunc(src); + // if (src < 0.0 && src != result) + // result += -1.0. + + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); + + const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64); + const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + + SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); + SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); + SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); + + SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); +} + SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue S0 = Op.getOperand(0); diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index 9b54022..34e36d8 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -51,7 +51,11 @@ private: SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, diff --git a/llvm/test/CodeGen/R600/fceil.ll b/llvm/test/CodeGen/R600/fceil.ll index b8b945f..b42aefa 100644 --- a/llvm/test/CodeGen/R600/fceil.ll +++ b/llvm/test/CodeGen/R600/fceil.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone @@ -7,15 +8,33 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone -; CI-LABEL: @fceil_f64: -; CI: V_CEIL_F64_e32 +; FUNC-LABEL: @fceil_f64: +; CI: V_CEIL_F64_e32 +; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: S_LSHR_B64 +; SI: S_NOT_B64 +; SI: S_AND_B64 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: CMP_LT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_F64 +; SI: CNDMASK_B32 +; SI: CMP_NE_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: V_ADD_F64 define void @fceil_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.ceil.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } -; CI-LABEL: @fceil_v2f64: +; FUNC-LABEL: @fceil_v2f64: ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { @@ -24,7 +43,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ret void } -; FIXME-CI-LABEL: @fceil_v3f64: +; FIXME-FUNC-LABEL: @fceil_v3f64: ; FIXME-CI: V_CEIL_F64_e32 ; FIXME-CI: V_CEIL_F64_e32 ; FIXME-CI: V_CEIL_F64_e32 @@ -34,7 +53,7 @@ define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ; ret void ; } -; CI-LABEL: @fceil_v4f64: +; FUNC-LABEL: @fceil_v4f64: ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 @@ -45,7 +64,7 @@ define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { ret void } -; CI-LABEL: @fceil_v8f64: +; FUNC-LABEL: @fceil_v8f64: ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 @@ -60,7 +79,7 @@ define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { ret void } -; CI-LABEL: @fceil_v16f64: +; FUNC-LABEL: @fceil_v16f64: ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 ; CI: V_CEIL_F64_e32 diff --git a/llvm/test/CodeGen/R600/ffloor.ll b/llvm/test/CodeGen/R600/ffloor.ll index 51d2b89..31c6116 100644 --- a/llvm/test/CodeGen/R600/ffloor.ll +++ b/llvm/test/CodeGen/R600/ffloor.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.floor.f64(double) nounwind readnone declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone @@ -7,15 +8,34 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone -; CI-LABEL: @ffloor_f64: +; FUNC-LABEL: @ffloor_f64: ; CI: V_FLOOR_F64_e32 + +; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: S_LSHR_B64 +; SI: S_NOT_B64 +; SI: S_AND_B64 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: CMP_LT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_LT_F64 +; SI: CNDMASK_B32 +; SI: CMP_NE_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: V_ADD_F64 define void @ffloor_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.floor.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } -; CI-LABEL: @ffloor_v2f64: +; FUNC-LABEL: @ffloor_v2f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { @@ -24,7 +44,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ret void } -; FIXME-CI-LABEL: @ffloor_v3f64: +; FIXME-FUNC-LABEL: @ffloor_v3f64: ; FIXME-CI: V_FLOOR_F64_e32 ; FIXME-CI: V_FLOOR_F64_e32 ; FIXME-CI: V_FLOOR_F64_e32 @@ -34,7 +54,7 @@ define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ; ret void ; } -; CI-LABEL: @ffloor_v4f64: +; FUNC-LABEL: @ffloor_v4f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 @@ -45,7 +65,7 @@ define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { ret void } -; CI-LABEL: @ffloor_v8f64: +; FUNC-LABEL: @ffloor_v8f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 @@ -60,7 +80,7 @@ define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { ret void } -; CI-LABEL: @ffloor_v16f64: +; FUNC-LABEL: @ffloor_v16f64: ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 ; CI: V_FLOOR_F64_e32 diff --git a/llvm/test/CodeGen/R600/ftrunc.ll b/llvm/test/CodeGen/R600/ftrunc.ll index 6b235ff..3cd1deb 100644 --- a/llvm/test/CodeGen/R600/ftrunc.ll +++ b/llvm/test/CodeGen/R600/ftrunc.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI %s +; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.trunc.f64(double) nounwind readnone declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone @@ -7,15 +8,40 @@ declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone -; CI-LABEL: @ftrunc_f64: +; FUNC-LABEL: @v_ftrunc_f64: ; CI: V_TRUNC_F64_e32 +; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11 +; SI: S_ENDPGM +define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { + %x = load double addrspace(1)* %in, align 8 + %y = call double @llvm.trunc.f64(double %x) nounwind readnone + store double %y, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @ftrunc_f64: +; CI: V_TRUNC_F64_e32 + +; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 +; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01 +; SI: S_LSHR_B64 +; SI: S_NOT_B64 +; SI: S_AND_B64 +; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 +; SI: CMP_LT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: CMP_GT_I32 +; SI: CNDMASK_B32 +; SI: CNDMASK_B32 +; SI: S_ENDPGM define void @ftrunc_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.trunc.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out ret void } -; CI-LABEL: @ftrunc_v2f64: +; FUNC-LABEL: @ftrunc_v2f64: ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { @@ -24,7 +50,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ret void } -; FIXME-CI-LABEL: @ftrunc_v3f64: +; FIXME-FUNC-LABEL: @ftrunc_v3f64: ; FIXME-CI: V_TRUNC_F64_e32 ; FIXME-CI: V_TRUNC_F64_e32 ; FIXME-CI: V_TRUNC_F64_e32 @@ -34,7 +60,7 @@ define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { ; ret void ; } -; CI-LABEL: @ftrunc_v4f64: +; FUNC-LABEL: @ftrunc_v4f64: ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 @@ -45,7 +71,7 @@ define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { ret void } -; CI-LABEL: @ftrunc_v8f64: +; FUNC-LABEL: @ftrunc_v8f64: ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 @@ -60,7 +86,7 @@ define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { ret void } -; CI-LABEL: @ftrunc_v16f64: +; FUNC-LABEL: @ftrunc_v16f64: ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32 ; CI: V_TRUNC_F64_e32