From 9dc8c448ed4511b7802d78f78a29c0714868c7b0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 3 Sep 2019 05:57:18 +0000 Subject: [PATCH] [X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets on 32-bit target. Use Custom lowering instead. Fall back to default expansion only when the scalar FP type belongs in an XMM register. This improves lowering for i32 to fp80, and also i32 to double on SSE1 only. llvm-svn: 370699 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++-- llvm/test/CodeGen/X86/scalar-fp-to-i32.ll | 182 +++++++++--------------------- 2 files changed, 63 insertions(+), 139 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f0fb3a..b6649da 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -287,19 +287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); } } else if (!Subtarget.useSoftFloat()) { - // Since AVX is a superset of SSE3, only check for SSE here. - if (Subtarget.hasSSE1() && !Subtarget.hasSSE3()) - // Expand FP_TO_UINT into a select. - // FIXME: We would like to use a Custom expander here eventually to do - // the optimal thing for SSE vs. the default expansion in the legalizer. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); - else - // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom. - // With SSE3 we can use fisttpll to convert to a signed i64; without - // SSE, we're stuck with a fistpll. - setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); - - setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); } // TODO: when we have SSE, these could be more efficient, by using movd/movq. @@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { if (UseSSEReg && IsSigned) return Op; + // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use + // fisttp. + if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3()) + return SDValue(); + // Fall back to X87. if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned)) return V; diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll index ed85858..4ca20a7 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll @@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind { ; ; SSE_32_WIN-LABEL: d_to_u32: ; SSE_32_WIN: # %bb.0: +; SSE_32_WIN-NEXT: pushl %ebp +; SSE_32_WIN-NEXT: movl %esp, %ebp +; SSE_32_WIN-NEXT: andl $-8, %esp ; SSE_32_WIN-NEXT: subl $16, %esp -; SSE_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: flds __real@4f000000 -; SSE_32_WIN-NEXT: fld %st(1) -; SSE_32_WIN-NEXT: fsub %st(1), %st +; SSE_32_WIN-NEXT: fldl 8(%ebp) ; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 ; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp) +; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fnstcw (%esp) -; SSE_32_WIN-NEXT: movzwl (%esp), %eax -; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 -; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fxch %st(1) -; SSE_32_WIN-NEXT: fistl {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fldcw (%esp) -; SSE_32_WIN-NEXT: fxch %st(1) -; SSE_32_WIN-NEXT: fucompi %st(1), %st -; SSE_32_WIN-NEXT: fstp %st(0) -; SSE_32_WIN-NEXT: jbe LBB2_1 -; SSE_32_WIN-NEXT: # %bb.2: ; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE_32_WIN-NEXT: addl $16, %esp -; SSE_32_WIN-NEXT: retl -; SSE_32_WIN-NEXT: LBB2_1: -; SSE_32_WIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE_32_WIN-NEXT: addl $16, %esp +; SSE_32_WIN-NEXT: movl %ebp, %esp +; SSE_32_WIN-NEXT: popl %ebp ; SSE_32_WIN-NEXT: retl ; ; SSE_32_LIN-LABEL: d_to_u32: ; SSE_32_LIN: # %bb.0: -; SSE_32_LIN-NEXT: subl $16, %esp +; SSE_32_LIN-NEXT: subl $20, %esp ; SSE_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: flds {{\.LCPI.*}} -; SSE_32_LIN-NEXT: fld %st(1) -; SSE_32_LIN-NEXT: fsub %st(1), %st ; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 ; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp) +; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fnstcw (%esp) -; SSE_32_LIN-NEXT: movzwl (%esp), %eax -; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 -; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fxch %st(1) -; SSE_32_LIN-NEXT: fistl {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fldcw (%esp) -; SSE_32_LIN-NEXT: fxch %st(1) -; SSE_32_LIN-NEXT: fucompi %st(1), %st -; SSE_32_LIN-NEXT: fstp %st(0) -; SSE_32_LIN-NEXT: jbe .LBB2_1 -; SSE_32_LIN-NEXT: # %bb.2: ; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE_32_LIN-NEXT: addl $16, %esp -; SSE_32_LIN-NEXT: retl -; SSE_32_LIN-NEXT: .LBB2_1: -; SSE_32_LIN-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE_32_LIN-NEXT: addl $16, %esp +; SSE_32_LIN-NEXT: addl $20, %esp ; SSE_32_LIN-NEXT: retl ; ; X87_WIN-LABEL: d_to_u32: @@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; ; SSE2_32_WIN-LABEL: x_to_u32: ; SSE2_32_WIN: # %bb.0: -; SSE2_32_WIN-NEXT: subl $8, %esp -; SSE2_32_WIN-NEXT: fldt {{[0-9]+}}(%esp) -; SSE2_32_WIN-NEXT: flds __real@4f000000 -; SSE2_32_WIN-NEXT: fld %st(1) -; SSE2_32_WIN-NEXT: fsub %st(1), %st -; SSE2_32_WIN-NEXT: xorl %eax, %eax -; SSE2_32_WIN-NEXT: fxch %st(1) -; SSE2_32_WIN-NEXT: fucompi %st(2), %st -; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st -; SSE2_32_WIN-NEXT: fstp %st(1) -; SSE2_32_WIN-NEXT: setbe %al -; SSE2_32_WIN-NEXT: fnstcw (%esp) -; SSE2_32_WIN-NEXT: movzwl (%esp), %ecx -; SSE2_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 -; SSE2_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: pushl %ebp +; SSE2_32_WIN-NEXT: movl %esp, %ebp +; SSE2_32_WIN-NEXT: andl $-8, %esp +; SSE2_32_WIN-NEXT: subl $16, %esp +; SSE2_32_WIN-NEXT: fldt 8(%ebp) +; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE2_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp) -; SSE2_32_WIN-NEXT: fldcw (%esp) -; SSE2_32_WIN-NEXT: shll $31, %eax -; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE2_32_WIN-NEXT: addl $8, %esp +; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_32_WIN-NEXT: movl %ebp, %esp +; SSE2_32_WIN-NEXT: popl %ebp ; SSE2_32_WIN-NEXT: retl ; ; SSE2_32_LIN-LABEL: x_to_u32: ; SSE2_32_LIN: # %bb.0: -; SSE2_32_LIN-NEXT: subl $8, %esp +; SSE2_32_LIN-NEXT: subl $20, %esp ; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}} -; SSE2_32_LIN-NEXT: fld %st(1) -; SSE2_32_LIN-NEXT: fsub %st(1), %st -; SSE2_32_LIN-NEXT: xorl %eax, %eax -; SSE2_32_LIN-NEXT: fxch %st(1) -; SSE2_32_LIN-NEXT: fucompi %st(2), %st -; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st -; SSE2_32_LIN-NEXT: fstp %st(1) -; SSE2_32_LIN-NEXT: setbe %al -; SSE2_32_LIN-NEXT: fnstcw (%esp) -; SSE2_32_LIN-NEXT: movzwl (%esp), %ecx -; SSE2_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 -; SSE2_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE2_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: fldcw (%esp) -; SSE2_32_LIN-NEXT: shll $31, %eax -; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE2_32_LIN-NEXT: addl $8, %esp +; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_32_LIN-NEXT: addl $20, %esp ; SSE2_32_LIN-NEXT: retl ; ; SSE2_64_WIN-LABEL: x_to_u32: @@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind { ; ; SSE_32_WIN-LABEL: x_to_u32: ; SSE_32_WIN: # %bb.0: -; SSE_32_WIN-NEXT: subl $8, %esp -; SSE_32_WIN-NEXT: fldt {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: flds __real@4f000000 -; SSE_32_WIN-NEXT: fld %st(1) -; SSE_32_WIN-NEXT: fsub %st(1), %st -; SSE_32_WIN-NEXT: xorl %eax, %eax -; SSE_32_WIN-NEXT: fxch %st(1) -; SSE_32_WIN-NEXT: fucompi %st(2), %st -; SSE_32_WIN-NEXT: fcmovnbe %st(1), %st -; SSE_32_WIN-NEXT: fstp %st(1) -; SSE_32_WIN-NEXT: setbe %al -; SSE_32_WIN-NEXT: fnstcw (%esp) -; SSE_32_WIN-NEXT: movzwl (%esp), %ecx -; SSE_32_WIN-NEXT: orl $3072, %ecx # imm = 0xC00 -; SSE_32_WIN-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE_32_WIN-NEXT: pushl %ebp +; SSE_32_WIN-NEXT: movl %esp, %ebp +; SSE_32_WIN-NEXT: andl $-8, %esp +; SSE_32_WIN-NEXT: subl $16, %esp +; SSE_32_WIN-NEXT: fldt 8(%ebp) +; SSE_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE_32_WIN-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fistpl {{[0-9]+}}(%esp) -; SSE_32_WIN-NEXT: fldcw (%esp) -; SSE_32_WIN-NEXT: shll $31, %eax -; SSE_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE_32_WIN-NEXT: addl $8, %esp +; SSE_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE_32_WIN-NEXT: movl %ebp, %esp +; SSE_32_WIN-NEXT: popl %ebp ; SSE_32_WIN-NEXT: retl ; ; SSE_32_LIN-LABEL: x_to_u32: ; SSE_32_LIN: # %bb.0: -; SSE_32_LIN-NEXT: subl $8, %esp +; SSE_32_LIN-NEXT: subl $20, %esp ; SSE_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: flds {{\.LCPI.*}} -; SSE_32_LIN-NEXT: fld %st(1) -; SSE_32_LIN-NEXT: fsub %st(1), %st -; SSE_32_LIN-NEXT: xorl %eax, %eax -; SSE_32_LIN-NEXT: fxch %st(1) -; SSE_32_LIN-NEXT: fucompi %st(2), %st -; SSE_32_LIN-NEXT: fcmovnbe %st(1), %st -; SSE_32_LIN-NEXT: fstp %st(1) -; SSE_32_LIN-NEXT: setbe %al -; SSE_32_LIN-NEXT: fnstcw (%esp) -; SSE_32_LIN-NEXT: movzwl (%esp), %ecx -; SSE_32_LIN-NEXT: orl $3072, %ecx # imm = 0xC00 -; SSE_32_LIN-NEXT: movw %cx, {{[0-9]+}}(%esp) +; SSE_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) +; SSE_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE_32_LIN-NEXT: orl $3072, %eax # imm = 0xC00 +; SSE_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) +; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) +; SSE_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fistpl {{[0-9]+}}(%esp) -; SSE_32_LIN-NEXT: fldcw (%esp) -; SSE_32_LIN-NEXT: shll $31, %eax -; SSE_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %eax -; SSE_32_LIN-NEXT: addl $8, %esp +; SSE_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE_32_LIN-NEXT: addl $20, %esp ; SSE_32_LIN-NEXT: retl ; ; X87_WIN-LABEL: x_to_u32: -- 2.7.4