From 9dc8c448ed4511b7802d78f78a29c0714868c7b0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 3 Sep 2019 05:57:18 +0000
Subject: [PATCH] [X86] Don't use Expand for i32 fp_to_uint on SSE1/2 targets
 on 32-bit target.

Use Custom lowering instead. Fall back to default expansion only
when the scalar FP type belongs in an XMM register. This improves
lowering for i32 to fp80, and also i32 to double on SSE1 only.

llvm-svn: 370699
---
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  20 ++--
 llvm/test/CodeGen/X86/scalar-fp-to-i32.ll | 182 +++++++++---------------------
 2 files changed, 63 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6f0fb3a..b6649da 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -287,19 +287,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
     }
   } else if (!Subtarget.useSoftFloat()) {
-    // Since AVX is a superset of SSE3, only check for SSE here.
-    if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
-      // Expand FP_TO_UINT into a select.
-      // FIXME: We would like to use a Custom expander here eventually to do
-      // the optimal thing for SSE vs. the default expansion in the legalizer.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
-    else
-      // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
-      // With SSE3 we can use fisttpll to convert to a signed i64; without
-      // SSE, we're stuck with a fistpll.
-      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
-
-    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
   }
 
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
@@ -19425,6 +19414,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
   if (UseSSEReg && IsSigned)
     return Op;
 
+  // Use default expansion for SSE1/2 targets without SSE3. With SSE3 we can use
+  // fisttp.
+  if (!IsSigned && UseSSEReg && !Subtarget.hasSSE3())
+    return SDValue();
+
   // Fall back to X87.
   if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned))
     return V;
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
index ed85858..4ca20a7 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
@@ -257,74 +257,36 @@ define i32 @d_to_u32(double %a) nounwind {
 ;
 ; SSE_32_WIN-LABEL: d_to_u32:
 ; SSE_32_WIN:       # %bb.0:
+; SSE_32_WIN-NEXT:    pushl %ebp
+; SSE_32_WIN-NEXT:    movl %esp, %ebp
+; SSE_32_WIN-NEXT:    andl $-8, %esp
 ; SSE_32_WIN-NEXT:    subl $16, %esp
-; SSE_32_WIN-NEXT:    fldl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    flds __real@4f000000
-; SSE_32_WIN-NEXT:    fld %st(1)
-; SSE_32_WIN-NEXT:    fsub %st(1), %st
+; SSE_32_WIN-NEXT:    fldl 8(%ebp)
 ; SSE_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
 ; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fnstcw (%esp)
-; SSE_32_WIN-NEXT:    movzwl (%esp), %eax
-; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
-; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fistl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw (%esp)
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fucompi %st(1), %st
-; SSE_32_WIN-NEXT:    fstp %st(0)
-; SSE_32_WIN-NEXT:    jbe LBB2_1
-; SSE_32_WIN-NEXT:  # %bb.2:
 ; SSE_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $16, %esp
-; SSE_32_WIN-NEXT:    retl
-; SSE_32_WIN-NEXT:  LBB2_1:
-; SSE_32_WIN-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $16, %esp
+; SSE_32_WIN-NEXT:    movl %ebp, %esp
+; SSE_32_WIN-NEXT:    popl %ebp
 ; SSE_32_WIN-NEXT:    retl
 ;
 ; SSE_32_LIN-LABEL: d_to_u32:
 ; SSE_32_LIN:       # %bb.0:
-; SSE_32_LIN-NEXT:    subl $16, %esp
+; SSE_32_LIN-NEXT:    subl $20, %esp
 ; SSE_32_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT:    fld %st(1)
-; SSE_32_LIN-NEXT:    fsub %st(1), %st
 ; SSE_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
 ; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fnstcw (%esp)
-; SSE_32_LIN-NEXT:    movzwl (%esp), %eax
-; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
-; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fistl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw (%esp)
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fucompi %st(1), %st
-; SSE_32_LIN-NEXT:    fstp %st(0)
-; SSE_32_LIN-NEXT:    jbe .LBB2_1
-; SSE_32_LIN-NEXT:  # %bb.2:
 ; SSE_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $16, %esp
-; SSE_32_LIN-NEXT:    retl
-; SSE_32_LIN-NEXT:  .LBB2_1:
-; SSE_32_LIN-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
-; SSE_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $16, %esp
+; SSE_32_LIN-NEXT:    addl $20, %esp
 ; SSE_32_LIN-NEXT:    retl
 ;
 ; X87_WIN-LABEL: d_to_u32:
@@ -507,52 +469,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ;
 ; SSE2_32_WIN-LABEL: x_to_u32:
 ; SSE2_32_WIN:       # %bb.0:
-; SSE2_32_WIN-NEXT:    subl $8, %esp
-; SSE2_32_WIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    flds __real@4f000000
-; SSE2_32_WIN-NEXT:    fld %st(1)
-; SSE2_32_WIN-NEXT:    fsub %st(1), %st
-; SSE2_32_WIN-NEXT:    xorl %eax, %eax
-; SSE2_32_WIN-NEXT:    fxch %st(1)
-; SSE2_32_WIN-NEXT:    fucompi %st(2), %st
-; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st
-; SSE2_32_WIN-NEXT:    fstp %st(1)
-; SSE2_32_WIN-NEXT:    setbe %al
-; SSE2_32_WIN-NEXT:    fnstcw (%esp)
-; SSE2_32_WIN-NEXT:    movzwl (%esp), %ecx
-; SSE2_32_WIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE2_32_WIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    pushl %ebp
+; SSE2_32_WIN-NEXT:    movl %esp, %ebp
+; SSE2_32_WIN-NEXT:    andl $-8, %esp
+; SSE2_32_WIN-NEXT:    subl $16, %esp
+; SSE2_32_WIN-NEXT:    fldt 8(%ebp)
+; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE2_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
 ; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE2_32_WIN-NEXT:    fldcw (%esp)
-; SSE2_32_WIN-NEXT:    shll $31, %eax
-; SSE2_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_WIN-NEXT:    addl $8, %esp
+; SSE2_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE2_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_WIN-NEXT:    movl %ebp, %esp
+; SSE2_32_WIN-NEXT:    popl %ebp
 ; SSE2_32_WIN-NEXT:    retl
 ;
 ; SSE2_32_LIN-LABEL: x_to_u32:
 ; SSE2_32_LIN:       # %bb.0:
-; SSE2_32_LIN-NEXT:    subl $8, %esp
+; SSE2_32_LIN-NEXT:    subl $20, %esp
 ; SSE2_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE2_32_LIN-NEXT:    fld %st(1)
-; SSE2_32_LIN-NEXT:    fsub %st(1), %st
-; SSE2_32_LIN-NEXT:    xorl %eax, %eax
-; SSE2_32_LIN-NEXT:    fxch %st(1)
-; SSE2_32_LIN-NEXT:    fucompi %st(2), %st
-; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st
-; SSE2_32_LIN-NEXT:    fstp %st(1)
-; SSE2_32_LIN-NEXT:    setbe %al
-; SSE2_32_LIN-NEXT:    fnstcw (%esp)
-; SSE2_32_LIN-NEXT:    movzwl (%esp), %ecx
-; SSE2_32_LIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE2_32_LIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE2_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE2_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE2_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE2_32_LIN-NEXT:    fldcw (%esp)
-; SSE2_32_LIN-NEXT:    shll $31, %eax
-; SSE2_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE2_32_LIN-NEXT:    addl $8, %esp
+; SSE2_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_32_LIN-NEXT:    addl $20, %esp
 ; SSE2_32_LIN-NEXT:    retl
 ;
 ; SSE2_64_WIN-LABEL: x_to_u32:
@@ -585,52 +531,36 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ;
 ; SSE_32_WIN-LABEL: x_to_u32:
 ; SSE_32_WIN:       # %bb.0:
-; SSE_32_WIN-NEXT:    subl $8, %esp
-; SSE_32_WIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    flds __real@4f000000
-; SSE_32_WIN-NEXT:    fld %st(1)
-; SSE_32_WIN-NEXT:    fsub %st(1), %st
-; SSE_32_WIN-NEXT:    xorl %eax, %eax
-; SSE_32_WIN-NEXT:    fxch %st(1)
-; SSE_32_WIN-NEXT:    fucompi %st(2), %st
-; SSE_32_WIN-NEXT:    fcmovnbe %st(1), %st
-; SSE_32_WIN-NEXT:    fstp %st(1)
-; SSE_32_WIN-NEXT:    setbe %al
-; SSE_32_WIN-NEXT:    fnstcw (%esp)
-; SSE_32_WIN-NEXT:    movzwl (%esp), %ecx
-; SSE_32_WIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE_32_WIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    pushl %ebp
+; SSE_32_WIN-NEXT:    movl %esp, %ebp
+; SSE_32_WIN-NEXT:    andl $-8, %esp
+; SSE_32_WIN-NEXT:    subl $16, %esp
+; SSE_32_WIN-NEXT:    fldt 8(%ebp)
+; SSE_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE_32_WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE_32_WIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE_32_WIN-NEXT:    fldcw (%esp)
-; SSE_32_WIN-NEXT:    shll $31, %eax
-; SSE_32_WIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_WIN-NEXT:    addl $8, %esp
+; SSE_32_WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE_32_WIN-NEXT:    movl %ebp, %esp
+; SSE_32_WIN-NEXT:    popl %ebp
 ; SSE_32_WIN-NEXT:    retl
 ;
 ; SSE_32_LIN-LABEL: x_to_u32:
 ; SSE_32_LIN:       # %bb.0:
-; SSE_32_LIN-NEXT:    subl $8, %esp
+; SSE_32_LIN-NEXT:    subl $20, %esp
 ; SSE_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    flds {{\.LCPI.*}}
-; SSE_32_LIN-NEXT:    fld %st(1)
-; SSE_32_LIN-NEXT:    fsub %st(1), %st
-; SSE_32_LIN-NEXT:    xorl %eax, %eax
-; SSE_32_LIN-NEXT:    fxch %st(1)
-; SSE_32_LIN-NEXT:    fucompi %st(2), %st
-; SSE_32_LIN-NEXT:    fcmovnbe %st(1), %st
-; SSE_32_LIN-NEXT:    fstp %st(1)
-; SSE_32_LIN-NEXT:    setbe %al
-; SSE_32_LIN-NEXT:    fnstcw (%esp)
-; SSE_32_LIN-NEXT:    movzwl (%esp), %ecx
-; SSE_32_LIN-NEXT:    orl $3072, %ecx # imm = 0xC00
-; SSE_32_LIN-NEXT:    movw %cx, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; SSE_32_LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; SSE_32_LIN-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; SSE_32_LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
-; SSE_32_LIN-NEXT:    fldcw (%esp)
-; SSE_32_LIN-NEXT:    shll $31, %eax
-; SSE_32_LIN-NEXT:    xorl {{[0-9]+}}(%esp), %eax
-; SSE_32_LIN-NEXT:    addl $8, %esp
+; SSE_32_LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE_32_LIN-NEXT:    addl $20, %esp
 ; SSE_32_LIN-NEXT:    retl
 ;
 ; X87_WIN-LABEL: x_to_u32:
-- 
2.7.4