if (isInvariantLoad)
continue;
+ // A release fence requires that all stores complete before it, but does
+ // not prevent the reordering of following loads or stores 'before' the
+ // fence. As a result, we look past it when finding a dependency for
+ // loads. DSE uses this to find preceeding stores to delete and thus we
+ // can't bypass the fence if the query instruction is a store.
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ if (isLoad && FI->getOrdering() == Release)
+ continue;
+
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
--- /dev/null
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+; We conservative choose to prevent dead store elimination
+; across release or stronger fences. It's not required
+; (since the must still be a race on %addd.i), but
+; it is conservatively correct. A legal optimization
+; could hoist the second store above the fence, and then
+; DSE one of them.
+define void @test1(i32* %addr.i) {
+; CHECK-LABEL: @test1
+; CHECK: store i32 5
+; CHECK: fence
+; CHECK: store i32 5
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
+
+; Same as previous, but with different values. If we ever optimize
+; this more aggressively, this allows us to check that the correct
+; store is retained (the 'i32 1' store in this case)
+define void @test1b(i32* %addr.i) {
+; CHECK-LABEL: @test1b
+; CHECK: store i32 42
+; CHECK: fence release
+; CHECK: store i32 1
+; CHECK: ret
+ store i32 42, i32* %addr.i, align 4
+ fence release
+ store i32 1, i32* %addr.i, align 4
+ ret void
+}
+
+; We *could* DSE across this fence, but don't. No other thread can
+; observe the order of the acquire fence and the store.
+define void @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence acquire
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
--- /dev/null
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+; We can value forward across the fence since we can (semantically)
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ %a = load i32, i32* %addr.i, align 4
+ ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fence
+; CHECK-NOT: load
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence release
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence. This is not so much enforcing an
+; ordering property (though it is that too), but a liveness
+; property. We expect to eventually see the value of store by
+; another thread when spinning on that location.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: ret i32 %res
+ ; the following code is intented to model the unrolling of
+ ; two iterations in a spin loop of the form:
+ ; do { fence acquire: tmp = *%addr.i; ) while (!tmp);
+ ; It's hopefully clear that allowing PRE to turn this into:
+ ; if (!*%addr.i) while(true) {} would be unfortunate
+ fence acquire
+ %a = load i32, i32* %addr.i, align 4
+ fence acquire
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; Another example of why forwarding across an acquire fence is problematic
+; can be seen in a normal locking operation. Say we had:
+; *p = 5; unlock(l); lock(l); use(p);
+; forwarding the store to p would be invalid. A reasonable implementation
+; of unlock and lock might be:
+; unlock() { atomicrmw sub %l, 1 unordered; fence release }
+; lock() {
+; do {
+; %res = cmpxchg %p, 0, 1, monotonic monotonic
+; } while(!%res.success)
+; fence acquire;
+; }
+; Given we chose to forward across the release fence, we clearly can't forward
+; across the acquire fence as well.
+