From f29ddfe93f52f0185625659108a960fa01cba139 Mon Sep 17 00:00:00 2001 From: Paul Redmond Date: Fri, 15 Feb 2013 18:45:18 +0000 Subject: [PATCH] enable SDISel sincos optimization for GNU environments - add sincos to runtime library if target triple environment is GNU - added canCombineSinCosLibcall() which checks that sincos is in the RTL and if the environment is GNU then unsafe fpmath is enabled (required to preserve errno) - extended sincos-opt lit test Reviewed by: Hal Finkel llvm-svn: 175283 --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 ++++++++- llvm/lib/CodeGen/TargetLoweringBase.cpp | 25 +++++++++---- llvm/test/CodeGen/X86/sincos-opt.ll | 54 ++++++++++++++++++++------- 3 files changed, 73 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6a3e03b..4a0176b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -2111,6 +2112,20 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != 0; } +/// canCombineSinCosLibcall - Return true if sincos libcall is available and +/// can be used to combine sin and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + /// useSinCos - Only issue sincos libcall if both sin and cos are /// needed. static bool useSinCos(SDNode *Node) { @@ -3149,7 +3164,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - isSinCosLibcallAvailable(Node, TLI)) + canCombineSinCosLibcall(Node, TLI, TM)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 10aa9d6..f2329dc 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -34,7 +35,7 @@ using namespace llvm; /// InitLibcallNames - Set default libcall names. /// -static void InitLibcallNames(const char **Names) { +static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SHL_I16] = "__ashlhi3"; Names[RTLIB::SHL_I32] = "__ashlsi3"; Names[RTLIB::SHL_I64] = "__ashldi3"; @@ -341,12 +342,20 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; - // These are generally not available. - Names[RTLIB::SINCOS_F32] = 0; - Names[RTLIB::SINCOS_F64] = 0; - Names[RTLIB::SINCOS_F80] = 0; - Names[RTLIB::SINCOS_F128] = 0; - Names[RTLIB::SINCOS_PPCF128] = 0; + if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -726,7 +735,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, SupportJumpTables = true; MinimumJumpTableEntries = 4; - InitLibcallNames(LibcallRoutineNames); + InitLibcallNames(LibcallRoutineNames, TM); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); } diff --git a/llvm/test/CodeGen/X86/sincos-opt.ll b/llvm/test/CodeGen/X86/sincos-opt.ll index 65c8417..f364d1f 100644 --- a/llvm/test/CodeGen/X86/sincos-opt.ll +++ b/llvm/test/CodeGen/X86/sincos-opt.ll @@ -1,18 +1,24 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=SINCOS -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_SINCOS +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=OSX_NOOPT +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -mcpu=core2 -enable-unsafe-fp-math | FileCheck %s --check-prefix=GNU_SINCOS ; Combine sin / cos into a single call. ; rdar://13087969 define float @test1(float %x) nounwind { entry: -; SINCOS: test1: -; SINCOS: callq ___sincosf_stret -; SINCOS: addss %xmm1, %xmm0 +; GNU_SINCOS: test1: +; GNU_SINCOS: callq sincosf +; GNU_SINCOS: movss 4(%rsp), %xmm0 +; GNU_SINCOS: addss (%rsp), %xmm0 -; NOOPT: test1 -; NOOPT: callq _cosf -; NOOPT: callq _sinf +; OSX_SINCOS: test1: +; OSX_SINCOS: callq ___sincosf_stret +; OSX_SINCOS: addss %xmm1, %xmm0 + +; OSX_NOOPT: test1 +; OSX_NOOPT: callq _cosf +; OSX_NOOPT: callq _sinf %call = tail call float @sinf(float %x) nounwind readnone %call1 = tail call float @cosf(float %x) nounwind readnone %add = fadd float %call, %call1 @@ -21,20 +27,40 @@ entry: define double @test2(double %x) nounwind { entry: -; SINCOS: test2: -; SINCOS: callq ___sincos_stret -; SINCOS: addsd %xmm1, %xmm0 +; GNU_SINCOS: test2: +; GNU_SINCOS: callq sincos +; GNU_SINCOS: movsd 16(%rsp), %xmm0 +; GNU_SINCOS: addsd 8(%rsp), %xmm0 + +; OSX_SINCOS: test2: +; OSX_SINCOS: callq ___sincos_stret +; OSX_SINCOS: addsd %xmm1, %xmm0 -; NOOPT: test2 -; NOOPT: callq _cos -; NOOPT: callq _sin +; OSX_NOOPT: test2 +; OSX_NOOPT: callq _cos +; OSX_NOOPT: callq _sin %call = tail call double @sin(double %x) nounwind readnone %call1 = tail call double @cos(double %x) nounwind readnone %add = fadd double %call, %call1 ret double %add } +define x86_fp80 @test3(x86_fp80 %x) nounwind { +entry: +; GNU_SINCOS: test3: +; GNU_SINCOS: callq sinl +; GNU_SINCOS: callq cosl +; GNU_SINCOS: ret + %call = tail call x86_fp80 @sinl(x86_fp80 %x) nounwind + %call1 = tail call x86_fp80 @cosl(x86_fp80 %x) nounwind + %add = fadd x86_fp80 %call, %call1 + ret x86_fp80 %add +} + declare float @sinf(float) readonly declare double @sin(double) readonly declare float @cosf(float) readonly declare double @cos(double) readonly + +declare x86_fp80 @sinl(x86_fp80) +declare x86_fp80 @cosl(x86_fp80) -- 2.7.4