From f801205e4835598da60fcb4f5721ccd0cc323a32 Mon Sep 17 00:00:00 2001 From: Piotr Padlewski Date: Wed, 2 May 2018 08:22:07 +0000 Subject: [PATCH] Mark invariant.group.barrier as inaccessiblememonly It turned out that readonly argmemonly is not enough. store 42, %p %b = barrier(%p) store 43, %b the first store is dead, but because barrier was marked as reading argument memory, it was considered alive. With inaccessiblememonly it doesn't read the argument, but it also can't be CSEd. based on: https://reviews.llvm.org/D32006 llvm-svn: 331338 --- llvm/include/llvm/IR/Intrinsics.td | 11 +- llvm/test/Analysis/MemorySSA/invariant-groups.ll | 141 ++++++++++++++++------- llvm/test/Other/invariant.group.barrier.ll | 29 ++++- 3 files changed, 133 insertions(+), 48 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 918fbc5..b98f587 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -712,16 +712,19 @@ def int_invariant_end : Intrinsic<[], // invariant.group.barrier can't be marked with 'readnone' (IntrNoMem), // because it would cause CSE of two barriers with the same argument. -// Readonly and argmemonly says that barrier only reads its argument and -// it can be CSE only if memory didn't change between 2 barriers call, -// which is valid. +// Inaccessiblememonly says that the barrier doesn't read the argument, +// but it changes state not accessible to this module. This way +// we can DSE through the barrier because it doesn't read the value +// after store. Although the barrier doesn't modify any memory it +// can't be marked as readonly, because it would be possible to +// CSE 2 barriers with store in between. // The argument also can't be marked with 'returned' attribute, because // it would remove barrier. // Note that it is still experimental, which means that its semantics // might change in the future. def int_invariant_group_barrier : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly]>; + [IntrInaccessibleMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// // diff --git a/llvm/test/Analysis/MemorySSA/invariant-groups.ll b/llvm/test/Analysis/MemorySSA/invariant-groups.ll index eb8ad96..062b574 100644 --- a/llvm/test/Analysis/MemorySSA/invariant-groups.ll +++ b/llvm/test/Analysis/MemorySSA/invariant-groups.ll @@ -16,7 +16,7 @@ define i32 @foo(i32* %a) { store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -35,9 +35,9 @@ define i32 @skipBarrier(i32* %a) { store i32 0, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(1) +; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) - %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) + %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* ; We can skip the barrier only if the "skip" is not based on !invariant.group. @@ -54,7 +54,7 @@ define i32 @skipBarrier2(i32* %a) { %v = load i32, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(liveOnEntry) +; CHECK: 1 = MemoryDef(liveOnEntry) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -63,12 +63,12 @@ define i32 @skipBarrier2(i32* %a) { ; CHECK: MemoryUse(liveOnEntry) ; CHECK-NEXT: %v2 = load i32 %v2 = load i32, i32* %a32, align 4, !invariant.group !0 -; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 ; FIXME: based on invariant.group it should be MemoryUse(liveOnEntry) -; CHECK: MemoryUse(1) +; CHECK: MemoryUse(2) ; CHECK-NEXT: %v3 = load i32 %v3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %v2, %v3 @@ -85,7 +85,7 @@ define i32 @handleInvariantGroups(i32* %a) { ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -94,12 +94,12 @@ define i32 @handleInvariantGroups(i32* %a) { ; CHECK-NEXT: %2 = load i32 %2 = load i32, i32* %a32, align 4, !invariant.group !0 -; CHECK: 3 = MemoryDef(2) +; CHECK: 4 = MemoryDef(3) ; CHECK-NEXT: store i32 2 store i32 2, i32* @g, align 4 ; FIXME: This can be changed to MemoryUse(2) -; CHECK: MemoryUse(3) +; CHECK: MemoryUse(4) ; CHECK-NEXT: %3 = load i32 %3 = load i32, i32* %a32, align 4, !invariant.group !0 %add = add nsw i32 %2, %3 @@ -144,35 +144,35 @@ entry: ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: -; 4 = MemoryPhi({entry,2},{Loop.Body,3},{Loop.End,5}) -; CHECK: MemoryUse(4) +; 5 = MemoryPhi({entry,3},{Loop.Body,4},{Loop.End,6}) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 ; FIXME: MemoryUse(1) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %p, !invariant.group !0 -; CHECK: 3 = MemoryDef(4) +; CHECK: 4 = MemoryDef(5) store i8 4, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; 5 = MemoryPhi({entry,2},{Loop.Body,3}) -; CHECK: MemoryUse(5) +; 6 = MemoryPhi({entry,3},{Loop.Body,4}) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %2 = load %2 = load i8, i8* %after, align 4, !invariant.group !0 ; FIXME: MemoryUse(1) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -191,51 +191,51 @@ entry: ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End Loop.Body: -; CHECK: 6 = MemoryPhi({entry,2},{Loop.Body,3},{Loop.next,4},{Loop.End,5}) -; CHECK: MemoryUse(6) +; CHECK: 7 = MemoryPhi({entry,3},{Loop.Body,4},{Loop.next,5},{Loop.End,6}) +; CHECK: MemoryUse(7) ; CHECK-NEXT: %0 = load i8 %0 = load i8, i8* %after, !invariant.group !0 -; CHECK: 3 = MemoryDef(6) +; CHECK: 4 = MemoryDef(7) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(3) +; FIXME: MemoryUse(7) +; CHECK: MemoryUse(4) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 br i1 undef, label %Loop.next, label %Loop.Body Loop.next: -; CHECK: 4 = MemoryDef(3) +; CHECK: 5 = MemoryDef(4) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(6) -; CHECK: MemoryUse(4) +; FIXME: MemoryUse(7) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; CHECK: 7 = MemoryPhi({entry,2},{Loop.next,4}) -; CHECK: MemoryUse(7) +; CHECK: 8 = MemoryPhi({entry,3},{Loop.next,5}) +; CHECK: MemoryUse(8) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 -; CHECK: 5 = MemoryDef(7) +; CHECK: 6 = MemoryDef(8) ; CHECK-NEXT: call void @clobber8 call void @clobber8(i8* %after) -; FIXME: MemoryUse(7) -; CHECK: MemoryUse(5) +; FIXME: MemoryUse(8) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %after, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -252,7 +252,7 @@ entry: ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) -; CHECK: MemoryUse(2) +; CHECK: 3 = MemoryDef(2) ; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) %after = call i8* @llvm.invariant.group.barrier.p0i8(i8* %p) br i1 undef, label %Loop.Pre, label %Loop.End @@ -263,28 +263,28 @@ Loop.Pre: %0 = load i8, i8* %after, !invariant.group !0 br label %Loop.Body Loop.Body: -; CHECK: 4 = MemoryPhi({Loop.Pre,2},{Loop.Body,3},{Loop.End,5}) -; CHECK-NEXT: MemoryUse(4) +; CHECK: 5 = MemoryPhi({Loop.Pre,3},{Loop.Body,4},{Loop.End,6}) +; CHECK-NEXT: MemoryUse(5) ; CHECK-NEXT: %1 = load i8 %1 = load i8, i8* %after, !invariant.group !0 ; FIXME: MemoryUse(2) -; CHECK: MemoryUse(4) +; CHECK: MemoryUse(5) ; CHECK-NEXT: %2 = load i8 %2 = load i8, i8* %p, !invariant.group !0 -; CHECK: 3 = MemoryDef(4) +; CHECK: 4 = MemoryDef(5) store i8 4, i8* %after, !invariant.group !0 br i1 undef, label %Loop.End, label %Loop.Body Loop.End: -; CHECK: 5 = MemoryPhi({entry,2},{Loop.Body,3}) -; CHECK-NEXT: MemoryUse(5) +; CHECK: 6 = MemoryPhi({entry,3},{Loop.Body,4}) +; CHECK-NEXT: MemoryUse(6) ; CHECK-NEXT: %3 = load %3 = load i8, i8* %after, align 4, !invariant.group !0 ; FIXME: MemoryUse(2) -; CHECK: MemoryUse(5) +; CHECK: MemoryUse(6) ; CHECK-NEXT: %4 = load %4 = load i8, i8* %p, align 4, !invariant.group !0 br i1 undef, label %Ret, label %Loop.Body @@ -293,9 +293,70 @@ Ret: ret i8 %3 } +; In the future we would like to CSE barriers if there is no clobber between. +; CHECK-LABEL: define i8 @optimizable() +define i8 @optimizable() { +entry: + %ptr = alloca i8 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 42, i8* %ptr, !invariant.group !0 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; FIXME: This one could be CSEd. +; CHECK: 3 = MemoryDef(2) +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: call void @clobber8(i8* %ptr) + call void @clobber8(i8* %ptr) +; CHECK: 5 = MemoryDef(4) +; CHECK-NEXT: call void @use(i8* %ptr2) + call void @use(i8* %ptr2) +; CHECK: 6 = MemoryDef(5) +; CHECK-NEXT: call void @use(i8* %ptr3) + call void @use(i8* %ptr3) +; CHECK: MemoryUse(6) +; CHECK-NEXT: load i8, i8* %ptr3, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + + ret i8 %v +} + +; CHECK-LABEL: define i8 @unoptimizable2() +define i8 @unoptimizable2() { + %ptr = alloca i8 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 42, i8* %ptr, !invariant.group !0 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 3 = MemoryDef(2) + store i8 43, i8* %ptr +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: 5 = MemoryDef(4) +; CHECK-NEXT: call void @clobber8(i8* %ptr) + call void @clobber8(i8* %ptr) +; 6 = MemoryDef(5) +; CHECK-NEXT call void @use(i8* %ptr2) + call void @use(i8* %ptr2) +; CHECK: 7 = MemoryDef(6) +; CHECK-NEXT: call void @use(i8* %ptr3) + call void @use(i8* %ptr3) +; CHECK: MemoryUse(7) +; CHECK-NEXT: %v = load i8, i8* %ptr3, !invariant.group !0 + %v = load i8, i8* %ptr3, !invariant.group !0 + ret i8 %v +} + + declare i8* @llvm.invariant.group.barrier.p0i8(i8*) declare void @clobber(i32*) declare void @clobber8(i8*) - +declare void @use(i8* readonly) !0 = !{!"group1"} diff --git a/llvm/test/Other/invariant.group.barrier.ll b/llvm/test/Other/invariant.group.barrier.ll index f10e4a1..5ba4fcc 100644 --- a/llvm/test/Other/invariant.group.barrier.ll +++ b/llvm/test/Other/invariant.group.barrier.ll @@ -14,16 +14,17 @@ entry: store i8 42, i8* %ptr, !invariant.group !0 ; CHECK: call i8* @llvm.invariant.group.barrier.p0i8 %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) -; CHECK-NOT: call i8* @llvm.invariant.group.barrier.p0i8 +; FIXME: This one could be CSE +; CHECK: call i8* @llvm.invariant.group.barrier %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) ; CHECK: call void @clobber(i8* {{.*}}%ptr) call void @clobber(i8* %ptr) ; CHECK: call void @use(i8* {{.*}}%ptr2) call void @use(i8* %ptr2) -; CHECK: call void @use(i8* {{.*}}%ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr3) call void @use(i8* %ptr3) -; CHECK: load i8, i8* %ptr2, {{.*}}!invariant.group +; CHECK: load i8, i8* %ptr3, {{.*}}!invariant.group %v = load i8, i8* %ptr3, !invariant.group !0 ret i8 %v @@ -51,10 +52,30 @@ entry: ret i8 %v } +; CHECK-LABEL: define i8 @unoptimizable2() +define i8 @unoptimizable2() { + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr2 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) + store i8 43, i8* %ptr +; CHECK: call i8* @llvm.invariant.group.barrier + %ptr3 = call i8* @llvm.invariant.group.barrier.p0i8(i8* %ptr) +; CHECK: call void @clobber(i8* {{.*}}%ptr) + call void @clobber(i8* %ptr) +; CHECK: call void @use(i8* {{.*}}%ptr2) + call void @use(i8* %ptr2) +; CHECK: call void @use(i8* {{.*}}%ptr3) + call void @use(i8* %ptr3) +; CHECK: load i8, i8* %ptr3, {{.*}}!invariant.group + %v = load i8, i8* %ptr3, !invariant.group !0 + ret i8 %v +} + declare void @use(i8* readonly) declare void @clobber(i8*) -; CHECK: Function Attrs: argmemonly nounwind readonly +; CHECK: Function Attrs: inaccessiblememonly nounwind{{$}} ; CHECK-NEXT: declare i8* @llvm.invariant.group.barrier.p0i8(i8*) declare i8* @llvm.invariant.group.barrier.p0i8(i8*) -- 2.7.4