[AArch64] Fix bug in store of vector 0 DAGCombine.

author Geoff Berry <gberry@codeaurora.org>

Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)

committer Geoff Berry <gberry@codeaurora.org>

Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)
author Geoff Berry <gberry@codeaurora.org>
Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)
committer Geoff Berry <gberry@codeaurora.org>
Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

index cfee36d..ff9bf2a 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9448,11 +9448,20 @@ static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
        return SDValue();
    }
  
-  // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from
-  // undoing this transformation.
-  SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32
-                         ? DAG.getRegister(AArch64::WZR, MVT::i32)
-                         : DAG.getRegister(AArch64::XZR, MVT::i64);
+  // Use a CopyFromReg WZR/XZR here to prevent
+  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
+  SDLoc DL(&St);
+  unsigned ZeroReg;
+  EVT ZeroVT;
+  if (VT.getVectorElementType().getSizeInBits() == 32) {
+    ZeroReg = AArch64::WZR;
+    ZeroVT = MVT::i32;
+  } else {
+    ZeroReg = AArch64::XZR;
+    ZeroVT = MVT::i64;
+  }
+  SDValue SplatVal =
+      DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
    return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
  }
  
diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll

index 384aaa8..8c872cc 100644 (file)
--- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -12,9 +12,9 @@ entry:
  define void @t2() nounwind ssp {
  entry:
  ; CHECK-LABEL: t2:
+; CHECK: stp xzr, xzr, [sp, #16]
  ; CHECK: strh wzr, [sp, #32]
-; CHECK: stp xzr, xzr, [sp, #8]
-; CHECK: str xzr, [sp, #24]
+; CHECK: str xzr, [sp, #8]
    %buf = alloca [26 x i8], align 1
    %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
    call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
diff --git a/llvm/test/CodeGen/AArch64/fastcc.ll b/llvm/test/CodeGen/AArch64/fastcc.ll

index fcc8522..3ea6df5 100644 (file)
--- a/llvm/test/CodeGen/AArch64/fastcc.ll
+++ b/llvm/test/CodeGen/AArch64/fastcc.ll
@@ -21,9 +21,11 @@ define fastcc void @func_stack0() {
    call fastcc void @func_stack8([8 x i32] undef, i32 42)
  ; CHECK:  bl func_stack8
  ; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
  
  ; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
  
  
    call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
@@ -72,10 +74,12 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
    call fastcc void @func_stack8([8 x i32] undef, i32 42)
  ; CHECK:  bl func_stack8
  ; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
  
  
  ; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
  
  
    call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
@@ -116,9 +120,11 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {
    call fastcc void @func_stack8([8 x i32] undef, i32 42)
  ; CHECK:  bl func_stack8
  ; CHECK-NOT: sub sp, sp,
+; CHECK-NOT: [sp, #{{[-0-9]+}}]!
+; CHECK-NOT: [sp], #{{[-0-9]+}}
  
  ; CHECK-TAIL: bl func_stack8
-; CHECK-TAIL: sub sp, sp, #16
+; CHECK-TAIL: stp xzr, xzr, [sp, #-16]!
  
  
    call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9)
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll

index 9307b6a..e416dcb 100644 (file)
--- a/llvm/test/CodeGen/AArch64/ldst-opt.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll
@@ -1667,4 +1667,17 @@ entry:
    ret void
  }
  
-
+; Check for bug 34674 where invalid add of xzr was being generated.
+; CHECK-LABEL: bug34674:
+; CHECK: // %entry
+; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr
+; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0]
+; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1
+define i64 @bug34674(<2 x i64>* %p) {
+entry:
+  store <2 x i64> zeroinitializer, <2 x i64>* %p
+  %p2 = bitcast <2 x i64>* %p to i64*
+  %ld = load i64, i64* %p2
+  %add = add i64 %ld, 1
+  ret i64 %add
+}
author	Geoff Berry <gberry@codeaurora.org>
	Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)
committer	Geoff Berry <gberry@codeaurora.org>
	Thu, 21 Sep 2017 21:10:06 +0000 (21:10 +0000)
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/arm64-memset-inline.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/fastcc.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/ldst-opt.ll		patch \| blob \| history