Elide stores which are overwritten without being observed.

author Nirav Dave <niravd@google.com>

Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)

committer Nirav Dave <niravd@google.com>

Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)
author Nirav Dave <niravd@google.com>
Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)
committer Nirav Dave <niravd@google.com>
Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index caf5cb4..0ccee17 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13087,14 +13087,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      }
    }
  
-  // If this is a store followed by a store with the same value to the same
-  // location, then the store is dead/noop.
    if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
-    if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
-        ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
-        ST1->isUnindexed() && !ST1->isVolatile()) {
-      // The store is dead, remove it.
-      return Chain;
+    if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+        !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+        ST->getMemoryVT() == ST1->getMemoryVT()) {
+      // If this is a store followed by a store with the same value to the same
+      // location, then the store is dead/noop.
+      if (ST1->getValue() == Value) {
+        // The store is dead, remove it.
+        return Chain;
+      }
+
+      // If this is a store who's preceeding store to the same location
+      // and no one other node is chained to that store we can effectively
+      // drop the store. Do not remove stores to undef as they may be used as
+      // data sinks.
+      if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+          !ST1->getBasePtr().isUndef()) {
+        // ST1 is fully overwritten and can be elided. Combine with it's chain
+        // value.
+        CombineTo(ST1, ST1->getChain());
+        return SDValue();
+      }
      }
    }
  
diff --git a/llvm/test/CodeGen/AArch64/ldst-zero.ll b/llvm/test/CodeGen/AArch64/ldst-zero.ll

index 95b92ac..7d443a6 100644 (file)
--- a/llvm/test/CodeGen/AArch64/ldst-zero.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-zero.ll
@@ -9,9 +9,9 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
  ; Original test case which exhibited the bug
  define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) {
  ; CHECK-LABEL: test1:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str w1, [x0, #16]
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
  entry:
    %0 = bitcast %struct.tree_common* %t to i8*
    tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false)
@@ -25,10 +25,8 @@ entry:
  ; Store to each struct element instead of using memset
  define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) {
  ; CHECK-LABEL: test2:
-; CHECK: stp xzr, xzr, [x0]
-; CHECK: str wzr, [x0, #16]
-; CHECK: str w1, [x0, #16]
-; CHECK: str x2, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: stp xzr, x2, [x0]
  entry:
    %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0
    %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1
@@ -44,9 +42,9 @@ entry:
  ; Vector store instead of memset
  define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) {
  ; CHECK-LABEL: test3:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str w1, [x0, #16]
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
  entry:
    %0 = bitcast %struct.tree_common* %t to <3 x i64>*
    store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8
@@ -60,9 +58,8 @@ entry:
  ; Vector store, then store to vector elements
  define void @test4(<3 x i64>* %p, i64 %x, i64 %y) {
  ; CHECK-LABEL: test4:
-; CHECK: stp xzr, xzr, [x0, #8]
-; CHECK: stp xzr, x2, [x0]
-; CHECK: str x1, [x0, #16]
+; CHECK-DAG: stp x2, x1, [x0, #8]
+; CHECK-DAG: str xzr, [x0]
  entry:
    store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8
    %0 = bitcast <3 x i64>* %p to i64*
diff --git a/llvm/test/CodeGen/AArch64/misched-stp.ll b/llvm/test/CodeGen/AArch64/misched-stp.ll

index 4ea481c..1c9ea68 100644 (file)
--- a/llvm/test/CodeGen/AArch64/misched-stp.ll
+++ b/llvm/test/CodeGen/AArch64/misched-stp.ll
@@ -1,20 +1,18 @@
  ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s
  
  ; Tests to check that the scheduler dependencies derived from alias analysis are
  ; correct when we have loads that have been split up so that they can later be
  ; merged into STP.
  
-; CHECK: ********** MI Scheduling **********
-; CHECK: test_splat:BB#0 entry
-; CHECK: SU({{[0-9]+}}):   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU1:SU\([0-9]+\)]]
-; CHECK: SU({{[0-9]+}}):   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU2:SU\([0-9]+\)]]
-; CHECK: [[SU1]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2]
-; CHECK: [[SU2]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1]
+; Now that overwritten stores are elided in SelectionDAG, dependencies
+; are resolved and removed before MISCHED. Check that we have
+; equivalent pair of stp calls as a baseline.
+
+; CHECK-LABEL: test_splat
+; CHECK:     ldr [[REG:w[0-9]+]], [x2]
+; CHECK-DAG: stp w0, [[REG]], [x2, #12]
+; CHECK-DAG: stp [[REG]], w1, [x2, #4]
  define void @test_splat(i32 %x, i32 %y, i32* %p) {
  entry:
    %val = load i32, i32* %p, align 4
@@ -35,16 +33,11 @@ entry:
  declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
  %struct.tree_common = type { i8*, i8*, i32 }
  
-; CHECK: ********** MI Scheduling **********
-; CHECK: test_zero:BB#0 entry
-; CHECK: SU({{[0-9]+}}):   STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU3:SU\([0-9]+\)]]
-; CHECK: SU({{[0-9]+}}):   STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8]
-; CHECK: Successors:
-; CHECK-NEXT: ord  [[SU4:SU\([0-9]+\)]]
-; CHECK: [[SU3]]:   STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1]
-; CHECK: [[SU4]]:   STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2]
+; CHECK-LABEL: test_zero
+; CHECK-DAG: stp x2, xzr, [x0, #8]
+; CHECK-DAG: str w1, [x0, #16]
+; CHECK-DAG: str xzr, [x0]
+
  define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) {
  entry:
    %0 = bitcast %struct.tree_common* %t to i8*
diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll

index 80acfcc..1898c8f 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/global-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll
@@ -29,10 +29,10 @@
  define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) {
    %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index
    %val = load float, float addrspace(2)* %ptr
-  store float %val, float addrspace(1)* %out
+  store volatile float %val, float addrspace(1)* %out
    %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index
    %val2 = load float, float addrspace(2)* %ptr2
-  store float %val2, float addrspace(1)* %out
+  store volatile float %val2, float addrspace(1)* %out
    ret void
  }
  
diff --git a/llvm/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/llvm/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll

index 8403dd9..777eccb 100644 (file)
--- a/llvm/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
+++ b/llvm/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -20,7 +20,7 @@ bb3:                                              ; preds = %bb, %entry
  
  bb8:                                              ; preds = %bb3
    %1 = getelementptr inbounds i8, i8* %0, i32 0
-  store i8 0, i8* %1, align 1
+  store volatile i8 0, i8* %1, align 1
    %2 = call i32 @ptou() nounwind
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
@@ -35,7 +35,7 @@ bb8:                                              ; preds = %bb3
    %7 = or i8 %6, 48
    %8 = add i8 %6, 87
    %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
-  store i8 %iftmp.5.0.1, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.1, i8* %p8, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -49,7 +49,7 @@ bb8:                                              ; preds = %bb3
    %13 = or i8 %12, 48
    %14 = add i8 %12, 87
    %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
-  store i8 %iftmp.5.0.2, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.2, i8* %p8, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -63,7 +63,7 @@ bb8:                                              ; preds = %bb3
    %19 = or i8 %18, 48
    %20 = add i8 %18, 87
    %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
-  store i8 %iftmp.5.0.4, i8* null, align 1
+  store volatile i8 %iftmp.5.0.4, i8* null, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -74,7 +74,7 @@ bb8:                                              ; preds = %bb3
    %22 = urem i32 %21, 10
    %23 = icmp ult i32 %22, 10
    %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8
-  store i8 %iftmp.5.0.5, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.5, i8* %p8, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -88,7 +88,7 @@ bb8:                                              ; preds = %bb3
    %28 = or i8 %27, 48
    %29 = add i8 %27, 87
    %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
-  store i8 %iftmp.5.0.6, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.6, i8* %p8, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -102,7 +102,7 @@ bb8:                                              ; preds = %bb3
    %34 = or i8 %33, 48
    %35 = add i8 %33, 87
    %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
-  store i8 %iftmp.5.0.7, i8* %p8, align 1
+  store volatile i8 %iftmp.5.0.7, i8* %p8, align 1
    ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
    ; CHECK-NOT: [[REGISTER]],
    ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
@@ -116,7 +116,7 @@ bb8:                                              ; preds = %bb3
    %40 = or i8 %39, 48
    %41 = add i8 %39, 87
    %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
-  store i8 %iftmp.5.0.8, i8* null, align 1
+  store volatile i8 %iftmp.5.0.8, i8* null, align 1
    br label %bb46
  
  bb46:                                             ; preds = %bb3
diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll

index 2a5af61..9548602 100644 (file)
--- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -13,7 +13,7 @@ entry:
  ; CHECK: sub       sp, sp, #12
  ; CHECK: sub       sp, sp, #4
  ; CHECK: add       r0, sp, #4
-; CHECK: stm       sp, {r0, r1, r2, r3}
+; CHECK: stmib     sp, {r1, r2, r3}
    %g = alloca i8*
    %g1 = bitcast i8** %g to i8*
    call void @llvm.va_start(i8* %g1)
diff --git a/llvm/test/CodeGen/ARM/dag-combine-ldst.ll b/llvm/test/CodeGen/ARM/dag-combine-ldst.ll

index c1960ee..077754e 100644 (file)
--- a/llvm/test/CodeGen/ARM/dag-combine-ldst.ll
+++ b/llvm/test/CodeGen/ARM/dag-combine-ldst.ll
@@ -8,7 +8,7 @@
  ; CHECK-LABEL:   {{^}}main
  ; CHECK:         mov [[TMP:r[0-9]+]], #0
  ; CHECK-NEXT:    str [[TMP]], [sp, #4]
-; CHECK-NEXT:    str [[TMP]], [sp]
+; CHECK_O0:      str [[TMP]], [sp]
  ; CHECK_O0:      ldr [[TMP:r[0-9]+]], [sp]
  ; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2
  ; CHECK_O1-NOT:  ldr [[TMP:r[0-9]+]], [sp]
diff --git a/llvm/test/CodeGen/MSP430/vararg.ll b/llvm/test/CodeGen/MSP430/vararg.ll

index 6c8bcef..a708b89 100644 (file)
--- a/llvm/test/CodeGen/MSP430/vararg.ll
+++ b/llvm/test/CodeGen/MSP430/vararg.ll
@@ -25,7 +25,6 @@ define i16 @va_arg(i8* %vl) nounwind {
  entry:
  ; CHECK-LABEL: va_arg:
    %vl.addr = alloca i8*, align 2
-; CHECK: mov.w r12, 0(r1)
    store i8* %vl, i8** %vl.addr, align 2
  ; CHECK: mov.w r12, [[REG:r[0-9]+]]
  ; CHECK-NEXT: add.w #2, [[REG]]
diff --git a/llvm/test/CodeGen/Mips/msa/bmzi_bmnzi.ll b/llvm/test/CodeGen/Mips/msa/bmzi_bmnzi.ll

index d1cb3c3..de62dcd 100644 (file)
--- a/llvm/test/CodeGen/Mips/msa/bmzi_bmnzi.ll
+++ b/llvm/test/CodeGen/Mips/msa/bmzi_bmnzi.ll
@@ -9,9 +9,9 @@ entry:
    %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
    %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
    %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
-  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
    %3 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
-  store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
    %4 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
    store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
    ret void
@@ -32,9 +32,9 @@ entry:
    %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
    %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
    %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 240)
-  store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
    %3 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 15)
-  store <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
+  store volatile <16 x i8> %3, <16 x i8>* @llvm_mips_bmnzi_b_RES
    %4 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 170)
    store <16 x i8> %4, <16 x i8>* @llvm_mips_bmnzi_b_RES
    ret void
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128sf.ll b/llvm/test/CodeGen/PowerPC/ppcf128sf.ll

index 6804b55..fde7d48 100644 (file)
--- a/llvm/test/CodeGen/PowerPC/ppcf128sf.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128sf.ll
@@ -14,19 +14,19 @@ entry:
    %0 = load ppc_fp128, ppc_fp128* @ld, align 16
    %1 = load ppc_fp128, ppc_fp128* @ld2, align 16
    %add = fadd ppc_fp128 %0, %1
-  store ppc_fp128 %add, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %add, ppc_fp128* %c, align 16
    %2 = load ppc_fp128, ppc_fp128* @ld, align 16
    %3 = load ppc_fp128, ppc_fp128* @ld2, align 16
    %sub = fsub ppc_fp128 %2, %3
-  store ppc_fp128 %sub, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %sub, ppc_fp128* %c, align 16
    %4 = load ppc_fp128, ppc_fp128* @ld, align 16
    %5 = load ppc_fp128, ppc_fp128* @ld2, align 16
    %mul = fmul ppc_fp128 %4, %5
-  store ppc_fp128 %mul, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %mul, ppc_fp128* %c, align 16
    %6 = load ppc_fp128, ppc_fp128* @ld, align 16
    %7 = load ppc_fp128, ppc_fp128* @ld2, align 16
    %div = fdiv ppc_fp128 %6, %7
-  store ppc_fp128 %div, ppc_fp128* %c, align 16
+  store volatile ppc_fp128 %div, ppc_fp128* %c, align 16
    ret void
  
    ; CHECK-LABEL:    __gcc_qadd
diff --git a/llvm/test/CodeGen/SPARC/32abi.ll b/llvm/test/CodeGen/SPARC/32abi.ll

index 09e7a3a..3807f84 100644 (file)
--- a/llvm/test/CodeGen/SPARC/32abi.ll
+++ b/llvm/test/CodeGen/SPARC/32abi.ll
@@ -25,17 +25,17 @@ define void @intarg(i8  %a0,   ; %i0
                      i32 %a5,   ; %i5
                      i32 signext %a6,   ; [%fp+92]
                      i8* %a7) { ; [%fp+96]
-  store i8 %a0, i8* %a4
-  store i8 %a1, i8* %a4
+  store volatile i8 %a0, i8* %a4
+  store volatile i8 %a1, i8* %a4
    %p16 = bitcast i8* %a4 to i16*
-  store i16 %a2, i16* %p16
+  store volatile i16 %a2, i16* %p16
    %p32 = bitcast i8* %a4 to i32*
-  store i32 %a3, i32* %p32
+  store volatile i32 %a3, i32* %p32
    %pp = bitcast i8* %a4 to i8**
-  store i8* %a4, i8** %pp
-  store i32 %a5, i32* %p32
-  store i32 %a6, i32* %p32
-  store i8* %a7, i8** %pp
+  store volatile i8* %a4, i8** %pp
+  store volatile i32 %a5, i32* %p32
+  store volatile i32 %a6, i32* %p32
+  store volatile i8* %a7, i8** %pp
    ret void
  }
  
diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll

index b963be2..771cc40 100644 (file)
--- a/llvm/test/CodeGen/SPARC/64abi.ll
+++ b/llvm/test/CodeGen/SPARC/64abi.ll
@@ -24,17 +24,17 @@ define void @intarg(i8  %a0,   ; %i0
                      i32 %a5,   ; %i5
                      i32 signext %a6,   ; [%fp+BIAS+176]
                      i8* %a7) { ; [%fp+BIAS+184]
-  store i8 %a0, i8* %a4
-  store i8 %a1, i8* %a4
+  store volatile i8 %a0, i8* %a4
+  store volatile i8 %a1, i8* %a4
    %p16 = bitcast i8* %a4 to i16*
-  store i16 %a2, i16* %p16
+  store volatile i16 %a2, i16* %p16
    %p32 = bitcast i8* %a4 to i32*
-  store i32 %a3, i32* %p32
+  store volatile i32 %a3, i32* %p32
    %pp = bitcast i8* %a4 to i8**
-  store i8* %a4, i8** %pp
-  store i32 %a5, i32* %p32
-  store i32 %a6, i32* %p32
-  store i8* %a7, i8** %pp
+  store volatile i8* %a4, i8** %pp
+  store volatile i32 %a5, i32* %p32
+  store volatile i32 %a6, i32* %p32
+  store volatile i8* %a7, i8** %pp
    ret void
  }
  
@@ -316,7 +316,7 @@ define void @call_ret_i64_pair(i64* %i0) {
    %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
                                          i64* undef, i64* undef)
    %e0 = extractvalue { i64, i64 } %rv, 0
-  store i64 %e0, i64* %i0
+  store volatile i64 %e0, i64* %i0
    %e1 = extractvalue { i64, i64 } %rv, 1
    store i64 %e1, i64* %i0
    ret void
diff --git a/llvm/test/CodeGen/SystemZ/swift-return.ll b/llvm/test/CodeGen/SystemZ/swift-return.ll

index 69d0e97..977816f 100644 (file)
--- a/llvm/test/CodeGen/SystemZ/swift-return.ll
+++ b/llvm/test/CodeGen/SystemZ/swift-return.ll
@@ -189,11 +189,11 @@ define void @consume_i1_ret() {
    %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
    %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
    %val = zext i1 %v3 to i32
-  store i32 %val, i32* @var
+  store volatile i32 %val, i32* @var
    %val2 = zext i1 %v5 to i32
-  store i32 %val2, i32* @var
+  store volatile i32 %val2, i32* @var
    %val3 = zext i1 %v6 to i32
-  store i32 %val3, i32* @var
+  store volatile i32 %val3, i32* @var
    %val4 = zext i1 %v7 to i32
    store i32 %val4, i32* @var
    ret void
diff --git a/llvm/test/CodeGen/Thumb/stack-access.ll b/llvm/test/CodeGen/Thumb/stack-access.ll

index 44217ab..533559a 100644 (file)
--- a/llvm/test/CodeGen/Thumb/stack-access.ll
+++ b/llvm/test/CodeGen/Thumb/stack-access.ll
@@ -7,13 +7,13 @@ define void @test1(i8** %p) {
    %z = alloca i8, align 1
  ; CHECK: add r1, sp, #8
  ; CHECK: str r1, [r0]
-  store i8* %x, i8** %p, align 4
+  store volatile i8* %x, i8** %p, align 4
  ; CHECK: add r1, sp, #4
  ; CHECK: str r1, [r0]
-  store i8* %y, i8** %p, align 4
+  store volatile i8* %y, i8** %p, align 4
  ; CHECK: mov r1, sp
  ; CHECK: str r1, [r0]
-  store i8* %z, i8** %p, align 4
+  store volatile i8* %z, i8** %p, align 4
    ret void
  }
  
@@ -24,10 +24,10 @@ define void @test2([1024 x i8]** %p) {
  ; CHECK: add r1, sp, #1020
  ; CHECK: adds r1, #4
  ; CHECK: str r1, [r0]
-  store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
+  store volatile [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
  ; CHECK: mov r1, sp
  ; CHECK: str r1, [r0]
-  store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
+  store volatile [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
    ret void
  }
  
diff --git a/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll b/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll

index 3e4bd02..c6d00d4 100644 (file)
--- a/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -50,9 +50,9 @@ bb420:                                            ; preds = %bb20, %bb20
  ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
  ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
  ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
-  store %union.rec* null, %union.rec** @zz_hold, align 4
+  store volatile %union.rec* null, %union.rec** @zz_hold, align 4
    store %union.rec* null, %union.rec** @zz_res, align 4
-  store %union.rec* %x, %union.rec** @zz_hold, align 4
+  store volatile %union.rec* %x, %union.rec** @zz_hold, align 4
    %0 = call  %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
    unreachable
  
diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll

index b9a2eee..126f5a1 100644 (file)
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -253,9 +253,7 @@ entry:
  ; CHECK: calll _addrof_i32
  ; CHECK: retl
  
-
  ; Don't elide the copy when the alloca is escaped with a store.
-
  define void @escape_with_store(i32 %x) {
    %x1 = alloca i32
    %x2 = alloca i32*
@@ -268,9 +266,8 @@ define void @escape_with_store(i32 %x) {
  }
  
  ; CHECK-LABEL: _escape_with_store:
-; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
-; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
-; CHECK: movl %[[reg]], [[offs]](%esp)
+; CHECK: movl {{.*}}(%esp), %[[reg:[^ ]*]]
+; CHECK: movl %[[reg]], [[offs:[0-9]*]](%esp)
  ; CHECK: calll _addrof_i32
  
  
diff --git a/llvm/test/CodeGen/X86/nontemporal.ll b/llvm/test/CodeGen/X86/nontemporal.ll

index 33d5cab..d49c887 100644 (file)
--- a/llvm/test/CodeGen/X86/nontemporal.ll
+++ b/llvm/test/CodeGen/X86/nontemporal.ll
@@ -9,33 +9,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
  ; X32-SSE:       # BB#0:
  ; X32-SSE-NEXT:    pushl %ebp
  ; X32-SSE-NEXT:    movl %esp, %ebp
-; X32-SSE-NEXT:    pushl %esi
  ; X32-SSE-NEXT:    andl $-16, %esp
  ; X32-SSE-NEXT:    subl $16, %esp
  ; X32-SSE-NEXT:    movl 72(%ebp), %eax
  ; X32-SSE-NEXT:    movl 76(%ebp), %ecx
-; X32-SSE-NEXT:    movl 12(%ebp), %edx
  ; X32-SSE-NEXT:    movdqa 56(%ebp), %xmm3
  ; X32-SSE-NEXT:    movdqa 40(%ebp), %xmm4
  ; X32-SSE-NEXT:    movdqa 24(%ebp), %xmm5
-; X32-SSE-NEXT:    movl 8(%ebp), %esi
-; X32-SSE-NEXT:    addps .LCPI0_0, %xmm0
-; X32-SSE-NEXT:    movntps %xmm0, (%esi)
-; X32-SSE-NEXT:    paddq .LCPI0_1, %xmm2
-; X32-SSE-NEXT:    movntdq %xmm2, (%esi)
-; X32-SSE-NEXT:    addpd .LCPI0_2, %xmm1
-; X32-SSE-NEXT:    movntpd %xmm1, (%esi)
-; X32-SSE-NEXT:    paddd .LCPI0_3, %xmm5
-; X32-SSE-NEXT:    movntdq %xmm5, (%esi)
-; X32-SSE-NEXT:    paddw .LCPI0_4, %xmm4
-; X32-SSE-NEXT:    movntdq %xmm4, (%esi)
-; X32-SSE-NEXT:    paddb .LCPI0_5, %xmm3
-; X32-SSE-NEXT:    movntdq %xmm3, (%esi)
-; X32-SSE-NEXT:    movntil %edx, (%esi)
-; X32-SSE-NEXT:    movntil %ecx, 4(%esi)
-; X32-SSE-NEXT:    movntil %eax, (%esi)
-; X32-SSE-NEXT:    leal -4(%ebp), %esp
-; X32-SSE-NEXT:    popl %esi
+; X32-SSE-NEXT:    movl 8(%ebp), %edx
+; X32-SSE-NEXT:    addps {{\.LCPI.*}}, %xmm0
+; X32-SSE-NEXT:    movntps %xmm0, (%edx)
+; X32-SSE-NEXT:    paddq {{\.LCPI.*}}, %xmm2
+; X32-SSE-NEXT:    movntdq %xmm2, (%edx)
+; X32-SSE-NEXT:    addpd {{\.LCPI.*}}, %xmm1
+; X32-SSE-NEXT:    movntpd %xmm1, (%edx)
+; X32-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm5
+; X32-SSE-NEXT:    movntdq %xmm5, (%edx)
+; X32-SSE-NEXT:    paddw {{\.LCPI.*}}, %xmm4
+; X32-SSE-NEXT:    movntdq %xmm4, (%edx)
+; X32-SSE-NEXT:    paddb {{\.LCPI.*}}, %xmm3
+; X32-SSE-NEXT:    movntdq %xmm3, (%edx)
+; X32-SSE-NEXT:    movntil %ecx, 4(%edx)
+; X32-SSE-NEXT:    movntil %eax, (%edx)
+; X32-SSE-NEXT:    movl %ebp, %esp
  ; X32-SSE-NEXT:    popl %ebp
  ; X32-SSE-NEXT:    retl
  ;
@@ -43,33 +39,29 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, <4
  ; X32-AVX:       # BB#0:
  ; X32-AVX-NEXT:    pushl %ebp
  ; X32-AVX-NEXT:    movl %esp, %ebp
-; X32-AVX-NEXT:    pushl %esi
  ; X32-AVX-NEXT:    andl $-16, %esp
  ; X32-AVX-NEXT:    subl $16, %esp
  ; X32-AVX-NEXT:    movl 72(%ebp), %eax
  ; X32-AVX-NEXT:    movl 76(%ebp), %ecx
-; X32-AVX-NEXT:    movl 12(%ebp), %edx
  ; X32-AVX-NEXT:    vmovdqa 56(%ebp), %xmm3
  ; X32-AVX-NEXT:    vmovdqa 40(%ebp), %xmm4
  ; X32-AVX-NEXT:    vmovdqa 24(%ebp), %xmm5
-; X32-AVX-NEXT:    movl 8(%ebp), %esi
-; X32-AVX-NEXT:    vaddps .LCPI0_0, %xmm0, %xmm0
-; X32-AVX-NEXT:    vmovntps %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddq .LCPI0_1, %xmm2, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vaddpd .LCPI0_2, %xmm1, %xmm0
-; X32-AVX-NEXT:    vmovntpd %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddd .LCPI0_3, %xmm5, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddw .LCPI0_4, %xmm4, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    vpaddb .LCPI0_5, %xmm3, %xmm0
-; X32-AVX-NEXT:    vmovntdq %xmm0, (%esi)
-; X32-AVX-NEXT:    movntil %edx, (%esi)
-; X32-AVX-NEXT:    movntil %ecx, 4(%esi)
-; X32-AVX-NEXT:    movntil %eax, (%esi)
-; X32-AVX-NEXT:    leal -4(%ebp), %esp
-; X32-AVX-NEXT:    popl %esi
+; X32-AVX-NEXT:    movl 8(%ebp), %edx
+; X32-AVX-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32-AVX-NEXT:    vmovntps %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
+; X32-AVX-NEXT:    vmovntpd %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddd {{\.LCPI.*}}, %xmm5, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddw {{\.LCPI.*}}, %xmm4, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    vpaddb {{\.LCPI.*}}, %xmm3, %xmm0
+; X32-AVX-NEXT:    vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT:    movntil %ecx, 4(%edx)
+; X32-AVX-NEXT:    movntil %eax, (%edx)
+; X32-AVX-NEXT:    movl %ebp, %esp
  ; X32-AVX-NEXT:    popl %ebp
  ; X32-AVX-NEXT:    retl
  ;
diff --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll

index 16f152d..5e9e1e3 100644 (file)
--- a/llvm/test/CodeGen/X86/store-narrow.ll
+++ b/llvm/test/CodeGen/X86/store-narrow.ll
@@ -134,10 +134,7 @@ entry:
  @g_16 = internal global i32 -1
  
  ; X64-LABEL: test8:
-; X64-NEXT: movl _g_16(%rip), %eax
-; X64-NEXT: movl $0, _g_16(%rip)
-; X64-NEXT: orl  $1, %eax
-; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: orb  $1, _g_16(%rip)
  ; X64-NEXT: ret
  define void @test8() nounwind {
    %tmp = load i32, i32* @g_16
diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll

index 60e33e6..0ea176d 100644 (file)
--- a/llvm/test/CodeGen/X86/swift-return.ll
+++ b/llvm/test/CodeGen/X86/swift-return.ll
@@ -184,11 +184,11 @@ define void @consume_i1_ret() {
    %v6 = extractvalue { i1, i1, i1, i1 } %call, 2
    %v7 = extractvalue { i1, i1, i1, i1 } %call, 3
    %val = zext i1 %v3 to i32
-  store i32 %val, i32* @var
+  store volatile i32 %val, i32* @var
    %val2 = zext i1 %v5 to i32
-  store i32 %val2, i32* @var
+  store volatile i32 %val2, i32* @var
    %val3 = zext i1 %v6 to i32
-  store i32 %val3, i32* @var
+  store volatile i32 %val3, i32* @var
    %val4 = zext i1 %v7 to i32
    store i32 %val4, i32* @var
    ret void
diff --git a/llvm/test/CodeGen/X86/win32-spill-xmm.ll b/llvm/test/CodeGen/X86/win32-spill-xmm.ll

index 0db97cf..c6b163b 100644 (file)
--- a/llvm/test/CodeGen/X86/win32-spill-xmm.ll
+++ b/llvm/test/CodeGen/X86/win32-spill-xmm.ll
@@ -20,7 +20,7 @@ declare void @bar(<16 x float> %a, i32 %b)
  ; Check that proper alignment of spilled vector does not affect vargs
  
  ; CHECK-LABEL: vargs_not_affected
-; CHECK: leal    28(%ebp), %eax
+; CHECK: movl 28(%ebp), %eax
  define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
  entry:
    %ap = alloca i8*, align 4
diff --git a/llvm/test/CodeGen/X86/win64_sibcall.ll b/llvm/test/CodeGen/X86/win64_sibcall.ll

index 4bba0e1..42dd4d3 100644 (file)
--- a/llvm/test/CodeGen/X86/win64_sibcall.ll
+++ b/llvm/test/CodeGen/X86/win64_sibcall.ll
@@ -12,8 +12,8 @@ entry:
  ; LINUX:       movq    $0, -8(%rsp)
  
    %this = alloca %Object addrspace(1)*
-  store %Object addrspace(1)* null, %Object addrspace(1)** %this
-  store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
+  store volatile %Object addrspace(1)* null, %Object addrspace(1)** %this
+  store volatile %Object addrspace(1)* %param0, %Object addrspace(1)** %this
    br label %0
  
  ; <label>:0                                       ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/win64_vararg.ll b/llvm/test/CodeGen/X86/win64_vararg.ll

index 8d7f201..20386bf 100644 (file)
--- a/llvm/test/CodeGen/X86/win64_vararg.ll
+++ b/llvm/test/CodeGen/X86/win64_vararg.ll
@@ -94,9 +94,7 @@ entry:
  
  ; CHECK-LABEL: arg4:
  ; CHECK: pushq
-; va_start:
-; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
-; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_start (optimized away as overwritten by va_arg)
  ; va_arg:
  ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
  ; CHECK: movq [[REG_arg4_2]], (%rsp)
diff --git a/llvm/test/CodeGen/X86/x86-64-ms_abi-vararg.ll b/llvm/test/CodeGen/X86/x86-64-ms_abi-vararg.ll

index e343652..299190e 100644 (file)
--- a/llvm/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
+++ b/llvm/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
@@ -90,9 +90,7 @@ entry:
  }
  
  ; CHECK-LABEL: arg4:
-; va_start:
-; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
-; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_start (optimized away as overwritten by va_arg)
  ; va_arg:
  ; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
  ; CHECK: movq [[REG_arg4_2]], (%rsp)
author	Nirav Dave <niravd@google.com>
	Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)
committer	Nirav Dave <niravd@google.com>
	Tue, 16 May 2017 19:43:56 +0000 (19:43 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/ldst-zero.ll		patch \| blob \| history
llvm/test/CodeGen/AArch64/misched-stp.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/global-constant.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll		patch \| blob \| history
llvm/test/CodeGen/ARM/dag-combine-ldst.ll		patch \| blob \| history
llvm/test/CodeGen/MSP430/vararg.ll		patch \| blob \| history
llvm/test/CodeGen/Mips/msa/bmzi_bmnzi.ll		patch \| blob \| history
llvm/test/CodeGen/PowerPC/ppcf128sf.ll		patch \| blob \| history
llvm/test/CodeGen/SPARC/32abi.ll		patch \| blob \| history
llvm/test/CodeGen/SPARC/64abi.ll		patch \| blob \| history
llvm/test/CodeGen/SystemZ/swift-return.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb/stack-access.ll		patch \| blob \| history
llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll		patch \| blob \| history
llvm/test/CodeGen/X86/arg-copy-elide.ll		patch \| blob \| history
llvm/test/CodeGen/X86/nontemporal.ll		patch \| blob \| history
llvm/test/CodeGen/X86/store-narrow.ll		patch \| blob \| history
llvm/test/CodeGen/X86/swift-return.ll		patch \| blob \| history
llvm/test/CodeGen/X86/win32-spill-xmm.ll		patch \| blob \| history
llvm/test/CodeGen/X86/win64_sibcall.ll		patch \| blob \| history
llvm/test/CodeGen/X86/win64_vararg.ll		patch \| blob \| history
llvm/test/CodeGen/X86/x86-64-ms_abi-vararg.ll		patch \| blob \| history