From 737fa40ffad15482801cc60271aed40dd5de9c95 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 20 Feb 2018 23:19:34 +0000 Subject: [PATCH] [DSE] Don't DSE stores that subsequent memmove calls read from Summary: We used to remove the first memmove in cases like this: memmove(p, p+2, 8); memmove(p, p+2, 8); which is incorrect. Fix this by changing isPossibleSelfRead to what was most likely the intended behavior. Historical note: the buggy code was added in https://reviews.llvm.org/rL120974 to address PR8728. Reviewers: rsmith Subscribers: mcrosier, llvm-commits, jlebar Differential Revision: https://reviews.llvm.org/D43425 llvm-svn: 325641 --- .../lib/Transforms/Scalar/DeadStoreElimination.cpp | 43 +++++++++++------- .../test/Transforms/DeadStoreElimination/simple.ll | 51 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index e6f0bb2..78bfa0e 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -510,8 +510,8 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later, /// memory region into an identical pointer) then it doesn't actually make its /// input dead in the traditional sense. Consider this case: /// -/// memcpy(A <- B) -/// memcpy(A <- A) +/// memmove(A <- B) +/// memmove(A <- A) /// /// In this case, the second store to A does not make the first store to A dead. /// The usual situation isn't an explicit A<-A store like this (which can be @@ -527,24 +527,35 @@ static bool isPossibleSelfRead(Instruction *Inst, // Self reads can only happen for instructions that read memory. Get the // location read. MemoryLocation InstReadLoc = getLocForRead(Inst, TLI); - if (!InstReadLoc.Ptr) return false; // Not a reading instruction. + if (!InstReadLoc.Ptr) + return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. - if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - - // Okay, 'Inst' may copy over itself. However, we can still remove a the - // DepWrite instruction if we can prove that it reads from the same location - // as Inst. This handles useful cases like: - // memcpy(A <- B) - // memcpy(A <- B) - // Here we don't know if A/B may alias, but we do know that B/B are must - // aliases, so removing the first memcpy is safe (assuming it writes <= # - // bytes as the second one. - MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); - - if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) + if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; + if (isa(Inst)) { + // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) + // but in practice memcpy(A <- B) either means that A and B are disjoint or + // are equal (i.e. there are not partial overlaps). Given that, if we have: + // + // memcpy/memmove(A <- B) // DepWrite + // memcpy(A <- B) // Inst + // + // with Inst reading/writing a >= size than DepWrite, we can reason as + // follows: + // + // - If A == B then both the copies are no-ops, so the DepWrite can be + // removed. + // - If A != B then A and B are disjoint locations in Inst. Since + // Inst.size >= DepWrite.size A and B are disjoint in DepWrite too. + // Therefore DepWrite can be removed. + MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI); + + if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr)) + return false; + } + // If DepWrite doesn't read memory or if we can't prove it is a must alias, // then it can't be considered dead. return true; diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll index 6130fbb..6046211 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -252,6 +252,9 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp { ; Do not delete instruction where possible situation is: ; A = B ; A = A +; +; NB! See PR11763 - currently LLVM allows memcpy's source and destination to be +; equal (but not inequal and overlapping). define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) @@ -521,3 +524,51 @@ define void @test35(i32* noalias %p) { store i32 0, i32* %p ret void } + +; We cannot optimize away the first memmove since %P could overlap with %Q. +define void @test36(i8* %P, i8* %Q) { +; CHECK-LABEL: @test36( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + ret void +} + +define void @test37(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test37( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +; Same caveat about memcpy as in @test18 applies here. +define void @test38(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test38( +; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) + ret void +} + +define void @test39(i8* %P, i8* %Q, i8* %R) { +; CHECK-LABEL: @test39( +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) +; CHECK-NEXT: ret + + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false) + ret void +} + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) -- 2.7.4