From: Juergen Ributzka Date: Fri, 13 Jun 2014 02:21:58 +0000 (+0000) Subject: [FastISel][X86] Add support for cvttss2si/cvttsd2si intrinsics. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3453bcf64d394b109ccab19915ba2fa40feeb59b;p=platform%2Fupstream%2Fllvm.git [FastISel][X86] Add support for cvttss2si/cvttsd2si intrinsics. This adds support for the cvttss2si/cvttsd2si intrinsics. Preceding insertelement instructions are folded into the conversion instruction (if possible). llvm-svn: 210870 --- diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index e041b50..0308b1e 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1962,6 +1962,72 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { UpdateValueMap(&I, ResultReg, 2); return true; } + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + bool IsInputDouble; + switch (I.getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic."); + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + if (!Subtarget->hasSSE1()) + return false; + IsInputDouble = false; + break; + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (!Subtarget->hasSSE2()) + return false; + IsInputDouble = true; + break; + } + + Type *RetTy = I.getCalledFunction()->getReturnType(); + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + static const unsigned CvtOpc[2][2][2] = { + { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, + { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, + { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, + { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } + }; + bool HasAVX = Subtarget->hasAVX(); + unsigned Opc; + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected result type."); + case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; + case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; + } + + // Check if we can fold insertelement instructions into the convert. + const Value *Op = I.getArgOperand(0); + while (auto *IE = dyn_cast(Op)) { + const Value *Index = IE->getOperand(2); + if (!isa(Index)) + break; + unsigned Idx = cast(Index)->getZExtValue(); + + if (Idx == 0) { + Op = IE->getOperand(1); + break; + } + Op = IE->getOperand(0); + } + + unsigned Reg = getRegForValue(Op); + if (Reg == 0) + return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(Reg); + + UpdateValueMap(&I, ResultReg); + return true; + } } } diff --git a/llvm/test/CodeGen/X86/fast-isel-sse12-fptoint.ll b/llvm/test/CodeGen/X86/fast-isel-sse12-fptoint.ll new file mode 100644 index 0000000..769c987 --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-isel-sse12-fptoint.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX + +define i32 @cvt_test1(float %a) { +; SSE-LABEL: cvt_test1 +; SSE: cvttss2si %xmm0, %eax +; AVX-LABEL: cvt_test1 +; AVX: vcvttss2si %xmm0, %eax + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3 + %5 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %4) + ret i32 %5 +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + +define i64 @cvt_test2(float %a) { +; SSE-LABEL: cvt_test2 +; SSE: cvttss2si %xmm0, %rax +; AVX-LABEL: cvt_test2 +; AVX: vcvttss2si %xmm0, %rax + %1 = insertelement <4 x float> undef, float %a, i32 0 + %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 1 + %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 2 + %4 = insertelement <4 x float> %3, float 0.000000e+00, i32 3 + %5 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %4) + ret i64 %5 +} +declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone + +define i32 @cvt_test3(double %a) { +; SSE-LABEL: cvt_test3 +; SSE: cvttsd2si %xmm0, %eax +; AVX-LABEL: cvt_test3 +; AVX: vcvttsd2si %xmm0, %eax + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1 + %3 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %2) + ret i32 %3 +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + +define i64 @cvt_test4(double %a) { +; SSE-LABEL: cvt_test4 +; SSE: cvttsd2si %xmm0, %rax +; AVX-LABEL: cvt_test4 +; AVX: vcvttsd2si %xmm0, %rax + %1 = insertelement <2 x double> undef, double %a, i32 0 + %2 = insertelement <2 x double> %1, double 0.000000e+00, i32 1 + %3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %2) + ret i64 %3 +} +declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone