if (!hasAnalyzableMemoryWrite(NI, TLI))
break;
+
+ if (!isRemovable(NI)) {
+ LLVM_DEBUG(dbgs() << " skip, cannot remove def\n");
+ continue;
+ }
+
MemoryLocation NILoc = *State.getLocForWriteEx(NI);
// Check for anything that looks like it will be a barrier to further
// removal
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-memoryssa %s -S | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128"
+
+define void @widget(i8* %ptr) {
+; CHECK-LABEL: @widget(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i32 4
+; CHECK-NEXT: [[PTR1_CAST:%.*]] = bitcast i8* [[PTR1]] to i32*
+; CHECK-NEXT: store atomic i32 0, i32* [[PTR1_CAST]] monotonic, align 4
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 0
+; CHECK-NEXT: [[PTR2_CAST:%.*]] = bitcast i8* [[PTR2]] to i64**
+; CHECK-NEXT: store i64* null, i64** [[PTR2_CAST]], align 4
+; CHECK-NEXT: ret void
+;
+bb:
+ %ptr1 = getelementptr inbounds i8, i8* %ptr, i32 4
+ %ptr1.cast = bitcast i8* %ptr1 to i32*
+ store atomic i32 0, i32* %ptr1.cast monotonic, align 4
+ %ptr2 = getelementptr inbounds i8, i8* %ptr, i32 0
+ %ptr2.cast = bitcast i8* %ptr2 to i64**
+ store i64* null, i64** %ptr2.cast, align 4
+ ret void
+}
--- /dev/null
+; XFAIL: *
+; RUN: opt -basicaa -dse -enable-dse-memoryssa -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; Sanity tests for atomic stores.
+; Note that it turns out essentially every transformation DSE does is legal on
+; atomic ops, just some transformations are not allowed across release-acquire pairs.
+
+@x = common global i32 0, align 4
+@y = common global i32 0, align 4
+
+; DSE no-op unordered atomic store (allowed)
+define void @test6() {
+; CHECK-LABEL: test6
+; CHECK-NOT: store
+; CHECK: ret void
+ %x = load atomic i32, i32* @x unordered, align 4
+ store atomic i32 %x, i32* @x unordered, align 4
+ ret void
+}
+
+; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
+define i32 @test9() {
+; CHECK-LABEL: test9
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+ store i32 0, i32* @x
+ %x = load atomic i32, i32* @y monotonic, align 4
+ store i32 1, i32* @x
+ ret i32 %x
+}
+
+; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
+define void @test10() {
+; CHECK-LABEL: test10
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+ store i32 0, i32* @x
+ store atomic i32 42, i32* @y monotonic, align 4
+ store i32 1, i32* @x
+ ret void
+}
-; XFAIL: *
; RUN: opt -basicaa -dse -enable-dse-memoryssa -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
ret void
}
-; DSE no-op unordered atomic store (allowed)
-define void @test6() {
-; CHECK-LABEL: test6
-; CHECK-NOT: store
-; CHECK: ret void
- %x = load atomic i32, i32* @x unordered, align 4
- store atomic i32 %x, i32* @x unordered, align 4
- ret void
-}
-
; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
; to reason about atomic operations).
define void @test7() {
ret i32 %x
}
-; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
-define i32 @test9() {
-; CHECK-LABEL: test9
-; CHECK-NOT: store i32 0
-; CHECK: store i32 1
- store i32 0, i32* @x
- %x = load atomic i32, i32* @y monotonic, align 4
- store i32 1, i32* @x
- ret i32 %x
-}
-
-; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
-define void @test10() {
-; CHECK-LABEL: test10
-; CHECK-NOT: store i32 0
-; CHECK: store i32 1
- store i32 0, i32* @x
- store atomic i32 42, i32* @y monotonic, align 4
- store i32 1, i32* @x
- ret void
-}
-
; DSE across monotonic load (forbidden since the eliminated store is atomic)
define i32 @test11() {
; CHECK-LABEL: test11
; Should not delete the volatile memset.
define void @test17v(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test17v(
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)