[X86] Use movlps for i64 atomic stores on 32-targets with sse1.

author Craig Topper <craig.topper@gmail.com>

Sun, 23 Feb 2020 02:15:27 +0000 (18:15 -0800)

committer Craig Topper <craig.topper@gmail.com>

Sun, 23 Feb 2020 02:22:47 +0000 (18:22 -0800)
author Craig Topper <craig.topper@gmail.com>
Sun, 23 Feb 2020 02:15:27 +0000 (18:15 -0800)
committer Craig Topper <craig.topper@gmail.com>
Sun, 23 Feb 2020 02:22:47 +0000 (18:22 -0800)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index a7e4bc7..6d4c067 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27514,7 +27514,6 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
    return false;
  }
  
-// TODO: In 32-bit mode, use MOVLPS when SSE1 is available?
  // TODO: In 32-bit mode, use FISTP when X87 is available?
  bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
    Type *MemType = SI->getValueOperand()->getType();
@@ -27522,7 +27521,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
    bool NoImplicitFloatOps =
        SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat);
    if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() &&
-      !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2())
+      !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE1())
      return false;
  
    return needsCmpXchgNb(MemType);
@@ -28281,16 +28280,18 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
      return Op;
  
    if (VT == MVT::i64 && !IsTypeLegal) {
-    // For illegal i64 atomic_stores, we can try to use MOVQ if SSE2 is enabled.
-    // FIXME: Use movlps with SSE1.
+    // For illegal i64 atomic_stores, we can try to use MOVQ or MOVLPS if SSE
+    // is enabled.
      // FIXME: Use fist with X87.
      bool NoImplicitFloatOps =
          DAG.getMachineFunction().getFunction().hasFnAttribute(
              Attribute::NoImplicitFloat);
      if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
-        Subtarget.hasSSE2()) {
+        Subtarget.hasSSE1()) {
        SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
                                       Node->getOperand(2));
+      MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32;
+      SclToVec = DAG.getBitcast(StVT, SclToVec);
        SDVTList Tys = DAG.getVTList(MVT::Other);
        SDValue Ops[] = { Node->getChain(), SclToVec, Node->getBasePtr() };
        SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys,
@@ -42370,6 +42371,28 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     const X86Subtarget &Subtarget) {
+  auto *St = cast<MemIntrinsicSDNode>(N);
+
+  SDValue StoredVal = N->getOperand(1);
+  MVT VT = StoredVal.getSimpleValueType();
+  EVT MemVT = St->getMemoryVT();
+
+  // Figure out which elements we demand.
+  unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits();
+  APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts);
+
+  APInt KnownUndef, KnownZero;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef,
+                                     KnownZero, DCI))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
  /// Return 'true' if this vector operation is "horizontal"
  /// and return the operands for the horizontal operation in LHS and RHS.  A
  /// horizontal operation performs the binary operation on successive elements
@@ -46751,6 +46774,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
    case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
    case ISD::MSTORE:         return combineMaskedStore(N, DAG, DCI, Subtarget);
+  case X86ISD::VEXTRACT_STORE:
+    return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget);
    case ISD::SINT_TO_FP:
    case ISD::STRICT_SINT_TO_FP:
      return combineSIntToFP(N, DAG, DCI, Subtarget);
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll

index 2d5417b..24950b5 100644 (file)
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -114,33 +114,23 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
  ; X86-SSE1:       # %bb.0:
  ; X86-SSE1-NEXT:    pushl %ebp
  ; X86-SSE1-NEXT:    movl %esp, %ebp
-; X86-SSE1-NEXT:    pushl %ebx
-; X86-SSE1-NEXT:    pushl %esi
  ; X86-SSE1-NEXT:    andl $-8, %esp
  ; X86-SSE1-NEXT:    subl $24, %esp
-; X86-SSE1-NEXT:    movl 8(%ebp), %esi
-; X86-SSE1-NEXT:    fildll (%esi)
+; X86-SSE1-NEXT:    movl 8(%ebp), %eax
+; X86-SSE1-NEXT:    fildll (%eax)
  ; X86-SSE1-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE1-NEXT:    movl %edx, {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fldl {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    faddl 12(%ebp)
  ; X86-SSE1-NEXT:    fstpl (%esp)
-; X86-SSE1-NEXT:    movl (%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl (%esi), %eax
-; X86-SSE1-NEXT:    movl 4(%esi), %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB1_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT:    jne .LBB1_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    leal -8(%ebp), %esp
-; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    popl %ebx
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE1-NEXT:    movl %ebp, %esp
  ; X86-SSE1-NEXT:    popl %ebp
  ; X86-SSE1-NEXT:    retl
  ;
@@ -298,9 +288,8 @@ define void @fadd_64g() nounwind {
  ; X86-SSE1:       # %bb.0:
  ; X86-SSE1-NEXT:    pushl %ebp
  ; X86-SSE1-NEXT:    movl %esp, %ebp
-; X86-SSE1-NEXT:    pushl %ebx
  ; X86-SSE1-NEXT:    andl $-8, %esp
-; X86-SSE1-NEXT:    subl $32, %esp
+; X86-SSE1-NEXT:    subl $24, %esp
  ; X86-SSE1-NEXT:    fildll glob64
  ; X86-SSE1-NEXT:    fistpll {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -310,18 +299,11 @@ define void @fadd_64g() nounwind {
  ; X86-SSE1-NEXT:    fld1
  ; X86-SSE1-NEXT:    faddl {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fstpl (%esp)
-; X86-SSE1-NEXT:    movl (%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl glob64+4, %edx
-; X86-SSE1-NEXT:    movl glob64, %eax
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB3_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b glob64
-; X86-SSE1-NEXT:    jne .LBB3_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    leal -4(%ebp), %esp
-; X86-SSE1-NEXT:    popl %ebx
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, glob64
+; X86-SSE1-NEXT:    movl %ebp, %esp
  ; X86-SSE1-NEXT:    popl %ebp
  ; X86-SSE1-NEXT:    retl
  ;
@@ -477,9 +459,8 @@ define void @fadd_64imm() nounwind {
  ; X86-SSE1:       # %bb.0:
  ; X86-SSE1-NEXT:    pushl %ebp
  ; X86-SSE1-NEXT:    movl %esp, %ebp
-; X86-SSE1-NEXT:    pushl %ebx
  ; X86-SSE1-NEXT:    andl $-8, %esp
-; X86-SSE1-NEXT:    subl $32, %esp
+; X86-SSE1-NEXT:    subl $24, %esp
  ; X86-SSE1-NEXT:    fildll -559038737
  ; X86-SSE1-NEXT:    fistpll {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -489,18 +470,11 @@ define void @fadd_64imm() nounwind {
  ; X86-SSE1-NEXT:    fld1
  ; X86-SSE1-NEXT:    faddl {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fstpl (%esp)
-; X86-SSE1-NEXT:    movl (%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl -559038737, %eax
-; X86-SSE1-NEXT:    movl -559038733, %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB5_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b -559038737
-; X86-SSE1-NEXT:    jne .LBB5_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    leal -4(%ebp), %esp
-; X86-SSE1-NEXT:    popl %ebx
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, -559038737
+; X86-SSE1-NEXT:    movl %ebp, %esp
  ; X86-SSE1-NEXT:    popl %ebp
  ; X86-SSE1-NEXT:    retl
  ;
@@ -662,10 +636,9 @@ define void @fadd_64stack() nounwind {
  ; X86-SSE1:       # %bb.0:
  ; X86-SSE1-NEXT:    pushl %ebp
  ; X86-SSE1-NEXT:    movl %esp, %ebp
-; X86-SSE1-NEXT:    pushl %ebx
  ; X86-SSE1-NEXT:    andl $-8, %esp
-; X86-SSE1-NEXT:    subl $40, %esp
-; X86-SSE1-NEXT:    fildll (%esp)
+; X86-SSE1-NEXT:    subl $32, %esp
+; X86-SSE1-NEXT:    fildll {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fistpll {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
  ; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -673,19 +646,12 @@ define void @fadd_64stack() nounwind {
  ; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fld1
  ; X86-SSE1-NEXT:    faddl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl (%esp), %eax
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB7_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b (%esp)
-; X86-SSE1-NEXT:    jne .LBB7_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    leal -4(%ebp), %esp
-; X86-SSE1-NEXT:    popl %ebx
+; X86-SSE1-NEXT:    fstpl (%esp)
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl %ebp, %esp
  ; X86-SSE1-NEXT:    popl %ebp
  ; X86-SSE1-NEXT:    retl
  ;
@@ -784,36 +750,26 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
  ; X86-SSE1:       # %bb.0: # %bb
  ; X86-SSE1-NEXT:    pushl %ebp
  ; X86-SSE1-NEXT:    movl %esp, %ebp
-; X86-SSE1-NEXT:    pushl %ebx
-; X86-SSE1-NEXT:    pushl %edi
  ; X86-SSE1-NEXT:    pushl %esi
  ; X86-SSE1-NEXT:    andl $-8, %esp
  ; X86-SSE1-NEXT:    subl $32, %esp
-; X86-SSE1-NEXT:    movl 20(%ebp), %esi
-; X86-SSE1-NEXT:    movl 8(%ebp), %edi
-; X86-SSE1-NEXT:    fildll (%edi,%esi,8)
+; X86-SSE1-NEXT:    movl 20(%ebp), %eax
+; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
+; X86-SSE1-NEXT:    fildll (%ecx,%eax,8)
  ; X86-SSE1-NEXT:    fistpll {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-SSE1-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-SSE1-NEXT:    movl %edx, {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    fldl {{[0-9]+}}(%esp)
  ; X86-SSE1-NEXT:    faddl 12(%ebp)
  ; X86-SSE1-NEXT:    fstpl (%esp)
-; X86-SSE1-NEXT:    movl (%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl (%edi,%esi,8), %eax
-; X86-SSE1-NEXT:    movl 4(%edi,%esi,8), %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB8_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b (%edi,%esi,8)
-; X86-SSE1-NEXT:    jne .LBB8_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    leal -12(%ebp), %esp
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
+; X86-SSE1-NEXT:    leal -4(%ebp), %esp
  ; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    popl %edi
-; X86-SSE1-NEXT:    popl %ebx
  ; X86-SSE1-NEXT:    popl %ebp
  ; X86-SSE1-NEXT:    retl
  ;
diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll

index d3aae06..ef31b27 100644 (file)
--- a/llvm/test/CodeGen/X86/atomic-non-integer.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll
@@ -55,27 +55,11 @@ define void @store_float(float* %fptr, float %v) {
  define void @store_double(double* %fptr, double %v) {
  ; X86-SSE1-LABEL: store_double:
  ; X86-SSE1:       # %bb.0:
-; X86-SSE1-NEXT:    pushl %ebx
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT:    pushl %esi
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT:    .cfi_offset %esi, -12
-; X86-SSE1-NEXT:    .cfi_offset %ebx, -8
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl (%esi), %eax
-; X86-SSE1-NEXT:    movl 4(%esi), %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB2_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT:    jne .LBB2_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT:    popl %ebx
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
  ; X86-SSE1-NEXT:    retl
  ;
  ; X86-SSE2-LABEL: store_double:
@@ -568,27 +552,12 @@ define void @store_float_seq_cst(float* %fptr, float %v) {
  define void @store_double_seq_cst(double* %fptr, double %v) {
  ; X86-SSE1-LABEL: store_double_seq_cst:
  ; X86-SSE1:       # %bb.0:
-; X86-SSE1-NEXT:    pushl %ebx
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT:    pushl %esi
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 12
-; X86-SSE1-NEXT:    .cfi_offset %esi, -12
-; X86-SSE1-NEXT:    .cfi_offset %ebx, -8
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE1-NEXT:    movl (%esi), %eax
-; X86-SSE1-NEXT:    movl 4(%esi), %edx
-; X86-SSE1-NEXT:    .p2align 4, 0x90
-; X86-SSE1-NEXT:  .LBB9_1: # %atomicrmw.start
-; X86-SSE1-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-SSE1-NEXT:    lock cmpxchg8b (%esi)
-; X86-SSE1-NEXT:    jne .LBB9_1
-; X86-SSE1-NEXT:  # %bb.2: # %atomicrmw.end
-; X86-SSE1-NEXT:    popl %esi
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE1-NEXT:    popl %ebx
-; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
+; X86-SSE1-NEXT:    lock orl $0, (%esp)
  ; X86-SSE1-NEXT:    retl
  ;
  ; X86-SSE2-LABEL: store_double_seq_cst:
author	Craig Topper <craig.topper@gmail.com>
	Sun, 23 Feb 2020 02:15:27 +0000 (18:15 -0800)
committer	Craig Topper <craig.topper@gmail.com>
	Sun, 23 Feb 2020 02:22:47 +0000 (18:22 -0800)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/atomic-fp.ll		patch \| blob \| history
llvm/test/CodeGen/X86/atomic-non-integer.ll		patch \| blob \| history