[X86] Move x86_64 fp128 conversion to libcalls from type legalization to DAG legalization

author Craig Topper <craig.topper@intel.com>

Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 1e3127f..8cc4b5f 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -337,6 +337,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::FREM             , MVT::f32  , Expand);
    setOperationAction(ISD::FREM             , MVT::f64  , Expand);
    setOperationAction(ISD::FREM             , MVT::f80  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f128 , Expand);
    setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
  
    // Promote the i8 variants and force them on up to i32 which has a shorter
@@ -383,15 +384,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    // There's never any support for operations beyond MVT::f32.
    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
    setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
+  setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
    setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
+  setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
  
    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
    setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    setTruncStoreAction(MVT::f80, MVT::f16, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f16, Expand);
  
    if (Subtarget.hasPOPCNT()) {
      setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
@@ -625,19 +630,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::FMA, MVT::f64, Expand);
    setOperationAction(ISD::FMA, MVT::f32, Expand);
  
-  // Long double always uses X87, except f128 in SSE.
+  // f80 always uses X87.
    if (UseX87) {
-    if (Subtarget.is64Bit() && Subtarget.hasSSE1()) {
-      addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
-                                                     : &X86::VR128RegClass);
-      ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
-      setOperationAction(ISD::FABS , MVT::f128, Custom);
-      setOperationAction(ISD::FNEG , MVT::f128, Custom);
-      setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
-
-      addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
-    }
-
      addRegisterClass(MVT::f80, &X86::RFP80RegClass);
      setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
      setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -673,10 +667,60 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setOperationAction(ISD::LLRINT, MVT::f80, Expand);
    }
  
+  // f128 uses xmm registers, but most operations require libcalls.
+  if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
+    addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                   : &X86::VR128RegClass);
+
+    addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
+
+    setOperationAction(ISD::FADD, MVT::f128, Custom);
+    setOperationAction(ISD::FSUB, MVT::f128, Custom);
+    setOperationAction(ISD::FDIV, MVT::f128, Custom);
+    setOperationAction(ISD::FMUL, MVT::f128, Custom);
+    setOperationAction(ISD::FMA,  MVT::f128, Expand);
+
+    setOperationAction(ISD::FABS, MVT::f128, Custom);
+    setOperationAction(ISD::FNEG, MVT::f128, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+
+    setOperationAction(ISD::FSIN,    MVT::f128, Expand);
+    setOperationAction(ISD::FCOS,    MVT::f128, Expand);
+    setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+    setOperationAction(ISD::FSQRT,   MVT::f128, Expand);
+
+    setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+    // We need to custom handle any FP_ROUND with an f128 input, but
+    // LegalizeDAG uses the result type to know when to run a custom handler.
+    // So we have to list all legal floating point result types here.
+    if (isTypeLegal(MVT::f32)) {
+      setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
+    }
+    if (isTypeLegal(MVT::f64)) {
+      setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+    }
+    if (isTypeLegal(MVT::f80)) {
+      setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
+      setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
+    }
+
+    setOperationAction(ISD::SETCC, MVT::f128, Custom);
+
+    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
+    setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+    setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+    setTruncStoreAction(MVT::f128, MVT::f80, Expand);
+  }
+
    // Always use a library call for pow.
    setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
    setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
    setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
+  setOperationAction(ISD::FPOW             , MVT::f128 , Expand);
  
    setOperationAction(ISD::FLOG, MVT::f80, Expand);
    setOperationAction(ISD::FLOG2, MVT::f80, Expand);
@@ -786,6 +830,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
  
      setOperationAction(ISD::LOAD,               MVT::v2f32, Custom);
      setOperationAction(ISD::STORE,              MVT::v2f32, Custom);
+
+    setOperationAction(ISD::STRICT_FP_ROUND,    MVT::v4f32, Custom);
    }
  
    if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -1139,6 +1185,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
  
      setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
  
+    setOperationAction(ISD::STRICT_FP_ROUND,    MVT::v8f32, Custom);
+
      if (!Subtarget.hasAVX512())
        setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
  
@@ -1400,6 +1448,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
      setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
      setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
  
+    setOperationAction(ISD::STRICT_FP_ROUND,    MVT::v16f32, Custom);
+
      setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
      setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
      setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
@@ -4661,6 +4711,10 @@ static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
          // X < 0   -> X == 0, jump on sign.
          return X86::COND_S;
        }
+      if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+        // X >= 0   -> X == 0, jump on !sign.
+        return X86::COND_NS;
+      }
        if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
          // X < 1   -> X <= 0
          RHS = DAG.getConstant(0, DL, RHS.getValueType());
@@ -18275,6 +18329,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
    MVT VT = Op.getSimpleValueType();
    SDLoc dl(Op);
  
+  if (VT == MVT::f128)
+    return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+
    if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
      return Extract;
  
@@ -18634,16 +18691,18 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
    SDValue N0 = Op.getOperand(0);
    SDLoc dl(Op);
    auto PtrVT = getPointerTy(DAG.getDataLayout());
+  MVT SrcVT = N0.getSimpleValueType();
+  MVT DstVT = Op.getSimpleValueType();
+
+  if (DstVT == MVT::f128)
+    return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
  
-  if (Op.getSimpleValueType().isVector())
+  if (DstVT.isVector())
      return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
  
    if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
      return Extract;
  
-  MVT SrcVT = N0.getSimpleValueType();
-  MVT DstVT = Op.getSimpleValueType();
-
    if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
        (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
      // Conversions from unsigned i32 to f32/f64 are legal,
@@ -19371,6 +19430,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
    MVT SrcVT = Src.getSimpleValueType();
    SDLoc dl(Op);
  
+  if (SrcVT == MVT::f128) {
+    RTLIB::Libcall LC;
+    if (Op.getOpcode() == ISD::FP_TO_SINT)
+      LC = RTLIB::getFPTOSINT(SrcVT, VT);
+    else
+      LC = RTLIB::getFPTOUINT(SrcVT, VT);
+
+    MakeLibCallOptions CallOptions;
+    return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first;
+  }
+
    if (VT.isVector()) {
      if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
        MVT ResVT = MVT::v4i32;
@@ -19446,12 +19516,17 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
    llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
  }
  
-static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
    SDLoc DL(Op);
    MVT VT = Op.getSimpleValueType();
    SDValue In = Op.getOperand(0);
    MVT SVT = In.getSimpleValueType();
  
+  if (VT == MVT::f128) {
+    RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+    return LowerF128Call(Op, DAG, LC);
+  }
+
    assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
  
    return DAG.getNode(X86ISD::VFPEXT, DL, VT,
@@ -19459,6 +19534,33 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
                                   In, DAG.getUNDEF(SVT)));
  }
  
+SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+  MVT VT = Op.getSimpleValueType();
+  SDValue In = Op.getOperand(0);
+  MVT SVT = In.getSimpleValueType();
+
+  // It's legal except when f128 is involved
+  if (SVT != MVT::f128)
+    return Op;
+
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT);
+
+  // FP_ROUND node has a second operand indicating whether it is known to be
+  // precise. That doesn't take part in the LibCall so we can't directly use
+  // LowerF128Call.
+  MakeLibCallOptions CallOptions;
+  return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
+}
+
+// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
+// the default expansion of STRICT_FP_ROUND.
+static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
+  // FIXME: Need to form a libcall with an input chain for f128.
+  assert(Op.getOperand(0).getValueType() != MVT::f128 &&
+         "Don't know how to handle f128 yet!");
+  return Op;
+}
+
  /// Horizontal vector math instructions may be slower than normal math with
  /// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
  /// implementation, and likely shuffle complexity of the alternate sequence.
@@ -19543,8 +19645,13 @@ static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
  
  /// Depending on uarch and/or optimizing for size, we might prefer to use a
  /// vector operation in place of the typical scalar operation.
-static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
-                             const X86Subtarget &Subtarget) {
+SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType() == MVT::f128) {
+    RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
+                                                    : RTLIB::SUB_F128;
+    return LowerF128Call(Op, DAG, LC);
+  }
+
    assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
           "Only expecting float/double");
    return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
@@ -20874,6 +20981,19 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    SDLoc dl(Op);
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
  
+  // Handle f128 first, since one possible outcome is a normal integer
+  // comparison which gets handled by emitFlagsForSetcc.
+  if (Op0.getValueType() == MVT::f128) {
+    softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
+
+    // If softenSetCCOperands returned a scalar, use it.
+    if (!Op1.getNode()) {
+      assert(Op0.getValueType() == Op.getValueType() &&
+             "Unexpected setcc expansion!");
+      return Op0;
+    }
+  }
+
    SDValue X86CC;
    SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
    if (!EFLAGS)
@@ -27579,6 +27699,13 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op,
    return NOOP;
  }
  
+SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+                                         RTLIB::Libcall Call) const {
+  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+  MakeLibCallOptions CallOptions;
+  return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+}
+
  /// Provide custom lowering hooks for some operations.
  SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    switch (Op.getOpcode()) {
@@ -27625,10 +27752,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::FP_TO_SINT:
    case ISD::FP_TO_UINT:         return LowerFP_TO_INT(Op, DAG);
    case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
+  case ISD::FP_ROUND:           return LowerFP_ROUND(Op, DAG);
+  case ISD::STRICT_FP_ROUND:    return LowerSTRICT_FP_ROUND(Op, DAG);
    case ISD::LOAD:               return LowerLoad(Op, Subtarget, DAG);
    case ISD::STORE:              return LowerStore(Op, Subtarget, DAG);
    case ISD::FADD:
-  case ISD::FSUB:               return lowerFaddFsub(Op, DAG, Subtarget);
+  case ISD::FSUB:               return lowerFaddFsub(Op, DAG);
+  case ISD::FMUL:               return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+  case ISD::FDIV:               return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
    case ISD::FABS:
    case ISD::FNEG:               return LowerFABSorFNEG(Op, DAG);
    case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h

index 82e3b98..6f2903a 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1342,6 +1342,12 @@ namespace llvm {
      SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
+                          RTLIB::Libcall Call) const;
  
      SDValue
      LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll

index 7dde098..3089add 100644 (file)
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -505,15 +505,11 @@ entry:
  define void @TestFPTruncF128_F80() nounwind {
  ; X64-SSE-LABEL: TestFPTruncF128_F80:
  ; X64-SSE:       # %bb.0: # %entry
-; X64-SSE-NEXT:    subq $24, %rsp
+; X64-SSE-NEXT:    pushq %rax
  ; X64-SSE-NEXT:    movaps {{.*}}(%rip), %xmm0
  ; X64-SSE-NEXT:    callq __trunctfxf2
-; X64-SSE-NEXT:    fstpt (%rsp)
-; X64-SSE-NEXT:    movq (%rsp), %rax
-; X64-SSE-NEXT:    movq %rax, {{.*}}(%rip)
-; X64-SSE-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; X64-SSE-NEXT:    movw %ax, vf80+{{.*}}(%rip)
-; X64-SSE-NEXT:    addq $24, %rsp
+; X64-SSE-NEXT:    fstpt {{.*}}(%rip)
+; X64-SSE-NEXT:    popq %rax
  ; X64-SSE-NEXT:    retq
  ;
  ; X32-LABEL: TestFPTruncF128_F80:
@@ -531,15 +527,11 @@ define void @TestFPTruncF128_F80() nounwind {
  ;
  ; X64-AVX-LABEL: TestFPTruncF128_F80:
  ; X64-AVX:       # %bb.0: # %entry
-; X64-AVX-NEXT:    subq $24, %rsp
+; X64-AVX-NEXT:    pushq %rax
  ; X64-AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
  ; X64-AVX-NEXT:    callq __trunctfxf2
-; X64-AVX-NEXT:    fstpt (%rsp)
-; X64-AVX-NEXT:    movq (%rsp), %rax
-; X64-AVX-NEXT:    movq %rax, {{.*}}(%rip)
-; X64-AVX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
-; X64-AVX-NEXT:    movw %ax, vf80+{{.*}}(%rip)
-; X64-AVX-NEXT:    addq $24, %rsp
+; X64-AVX-NEXT:    fstpt {{.*}}(%rip)
+; X64-AVX-NEXT:    popq %rax
  ; X64-AVX-NEXT:    retq
  entry:
    %0 = load fp128, fp128* @vf128, align 16
diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll

index 6f2b0c5..f7dd5a4 100644 (file)
--- a/llvm/test/CodeGen/X86/fp128-compare.ll
+++ b/llvm/test/CodeGen/X86/fp128-compare.ll
@@ -48,7 +48,10 @@ define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
  ; CHECK-NEXT:    pushq %rax
  ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  ; CHECK-NEXT:    callq __lttf2
-; CHECK-NEXT:    shrl $31, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    sets %cl
+; CHECK-NEXT:    movl %ecx, %eax
  ; CHECK-NEXT:    popq %rcx
  ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  ; CHECK-NEXT:    retq
@@ -56,9 +59,9 @@ entry:
    %cmp = fcmp olt fp128 %d1, %d2
    %conv = zext i1 %cmp to i32
    ret i32 %conv
-; The 'shrl' is a special optimization in llvm to combine
-; the effect of 'fcmp olt' and 'zext'. The main purpose is
-; to test soften call to __lttf2.
+; FIXME: This used to generate a shrl to move the sign bit of eax into bit 0.
+; This no longer happens with fp128 compares being expanded by LegalizeDAG.
+; We can add a new DAG combine for X86ISD::CMP/SETCC to restore this.
  }
  
  define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll

index f18b3e4..57776af 100644 (file)
--- a/llvm/test/CodeGen/X86/fp128-i128.ll
+++ b/llvm/test/CodeGen/X86/fp128-i128.ll
@@ -160,11 +160,14 @@ define fp128 @TestI128_1(fp128 %x) #0 {
  ; AVX-NEXT:    vmovaps (%rsp), %xmm0
  ; AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm1
  ; AVX-NEXT:    callq __lttf2
-; AVX-NEXT:    xorl %ecx, %ecx
  ; AVX-NEXT:    testl %eax, %eax
-; AVX-NEXT:    sets %cl
-; AVX-NEXT:    shlq $4, %rcx
-; AVX-NEXT:    vmovaps {{\.LCPI.*}}(%rcx), %xmm0
+; AVX-NEXT:    js .LBB2_1
+; AVX-NEXT:  # %bb.2: # %entry
+; AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
+; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    retq
+; AVX-NEXT:  .LBB2_1:
+; AVX-NEXT:    vmovaps {{.*}}(%rip), %xmm0
  ; AVX-NEXT:    addq $40, %rsp
  ; AVX-NEXT:    retq
  entry:
author	Craig Topper <craig.topper@intel.com>
	Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 11 Sep 2019 21:30:09 +0000 (21:30 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
llvm/test/CodeGen/X86/fp128-cast.ll		patch \| blob \| history
llvm/test/CodeGen/X86/fp128-compare.ll		patch \| blob \| history
llvm/test/CodeGen/X86/fp128-i128.ll		patch \| blob \| history