From 272b570a80d9028ab1c8e2cc408148c2c30bb117 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Wed, 11 Jun 2014 23:11:02 +0000 Subject: [PATCH] [FastISel][X86] Add support for the sqrt intrinsic. llvm-svn: 210720 --- llvm/lib/Target/X86/X86FastISel.cpp | 52 +++++++++++++++++++++++++++++++++++++ llvm/test/CodeGen/X86/sqrt.ll | 26 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 llvm/test/CodeGen/X86/sqrt.ll diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 2391984..329a96e 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1781,6 +1781,58 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); return true; } + case Intrinsic::sqrt: { + if (!Subtarget->hasSSE1()) + return false; + + Type *RetTy = I.getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + // Unfortunatelly we can't use FastEmit_r, because the AVX version of FSQRT + // is not generated by FastISel yet. + // FIXME: Update this code once tablegen can handle it. + static const unsigned SqrtOpc[2][2] = { + {X86::SQRTSSr, X86::VSQRTSSr}, + {X86::SQRTSDr, X86::VSQRTSDr} + }; + bool HasAVX = Subtarget->hasAVX(); + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: return false; + case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; + case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + } + + const Value *SrcVal = I.getArgOperand(0); + unsigned SrcReg = getRegForValue(SrcVal); + + if (SrcReg == 0) + return false; + + unsigned ImplicitDefReg = 0; + if (HasAVX) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + } + + unsigned ResultReg = createResultReg(RC); + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + if (ImplicitDefReg) + MIB.addReg(ImplicitDefReg); + + MIB.addReg(SrcReg); + + UpdateValueMap(&I, ResultReg); + return true; + } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: diff --git a/llvm/test/CodeGen/X86/sqrt.ll b/llvm/test/CodeGen/X86/sqrt.ll new file mode 100644 index 0000000..be7c6e8 --- /dev/null +++ b/llvm/test/CodeGen/X86/sqrt.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX + +define float @test_sqrt_f32(float %a) { +; SSE2-LABEL: test_sqrt_f32 +; SSE2: sqrtss %xmm0, %xmm0 +; AVX-LABEL: test_sqrt_f32 +; AVX: vsqrtss %xmm0, %xmm0 + %res = call float @llvm.sqrt.f32(float %a) + ret float %res +} +declare float @llvm.sqrt.f32(float) nounwind readnone + +define double @test_sqrt_f64(double %a) { +; SSE2-LABEL: test_sqrt_f64 +; SSE2: sqrtsd %xmm0, %xmm0 +; AVX-LABEL: test_sqrt_f64 +; AVX: vsqrtsd %xmm0, %xmm0 + %res = call double @llvm.sqrt.f64(double %a) + ret double %res +} +declare double @llvm.sqrt.f64(double) nounwind readnone + + -- 2.7.4