[x86] fix 256-bit vector store splitting to honor 'volatile'

author Sanjay Patel <spatel@rotateright.com>

Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 0bc31d5..7b4ce08 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21022,6 +21022,35 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
  }
  
+/// Change a 256-bit vector store into a pair of 128-bit vector stores.
+static SDValue split256BitStore(StoreSDNode *Store, SelectionDAG &DAG) {
+  SDValue StoredVal = Store->getValue();
+  assert(StoredVal.getValueType().is256BitVector() && "Expecting 256-bit op");
+
+  // Splitting volatile memory ops is not allowed unless the operation was not
+  // legal to begin with. We are assuming the input op is legal (this transform
+  // is only used for targets with AVX).
+  if (Store->isVolatile())
+    return SDValue();
+
+  MVT StoreVT = StoredVal.getSimpleValueType();
+  unsigned NumElems = StoreVT.getVectorNumElements();
+  SDLoc DL(Store);
+  SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, DL);
+  SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, DL);
+  SDValue Ptr0 = Store->getBasePtr();
+  SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, DL);
+  unsigned Alignment = Store->getAlignment();
+  SDValue Ch0 =
+      DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(),
+                   Alignment, Store->getMemOperand()->getFlags());
+  SDValue Ch1 =
+      DAG.getStore(Store->getChain(), DL, Value1, Ptr1,
+                   Store->getPointerInfo().getWithOffset(16),
+                   MinAlign(Alignment, 16), Store->getMemOperand()->getFlags());
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1);
+}
+
  static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
    StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
@@ -39345,20 +39374,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
      if (NumElems < 2)
        return SDValue();
  
-    SDValue Value0 = extract128BitVector(StoredVal, 0, DAG, dl);
-    SDValue Value1 = extract128BitVector(StoredVal, NumElems / 2, DAG, dl);
-
-    SDValue Ptr0 = St->getBasePtr();
-    SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 16, dl);
-
-    SDValue Ch0 =
-        DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
-                     Alignment, St->getMemOperand()->getFlags());
-    SDValue Ch1 =
-        DAG.getStore(St->getChain(), dl, Value1, Ptr1,
-                     St->getPointerInfo().getWithOffset(16),
-                     MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+    return split256BitStore(St, DAG);
    }
  
    // Optimize trunc store (of multiple scalars) to shuffle and store.
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll

index 1fd4e07..7bd39f4 100644 (file)
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -187,8 +187,10 @@ define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp
  define void @double_save_volatile(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind {
  ; CHECK-LABEL: double_save_volatile:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovaps %xmm1, 16(%rdi)
-; CHECK-NEXT:    vmovaps %xmm0, (%rdi)
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vmovups %ymm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
  ; CHECK-NEXT:    retq
  ;
  ; CHECK_O0-LABEL: double_save_volatile:
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 28 May 2019 12:58:07 +0000 (12:58 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/avx-load-store.ll		patch \| blob \| history