From 36bbd68667dbe46626982d3a150891fb85bcc9a8 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 26 Oct 2022 19:54:26 -0700 Subject: [PATCH] [InstCombine] Allow memcpys from constant memory to readonly nocapture parameters to be elided. Currently, InstCombine can elide a memcpy from a constant to a local alloca if that alloca is passed as a nocapture parameter to a *function* that's readnone or readonly, but it can't forward the memcpy if the *argument* is marked readonly nocapture, even though readonly guarantees that the callee won't mutate the pointee through that pointer. This patch adds support for detecting and handling such situations, which arise relatively frequently in Rust, a frontend that liberally emits readonly. A more general version of this optimization would use alias analysis to check the call's ModRef info for the pointee, but I was concerned about blowing up compile time, so for now I'm just checking for one of readnone on the function, readonly on the function, or readonly on the parameter. Differential Revision: https://reviews.llvm.org/D136822 --- .../Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 13 +++++++------ llvm/test/Transforms/InstCombine/memcpy-from-global.ll | 8 +++----- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 23f6207..9acfa61 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -39,8 +39,8 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); /// the alloca, and if the source pointer is a pointer to a constant global, we /// can optimize this. static bool -isOnlyCopiedFromConstantMemory(AAResults *AA, - Value *V, MemTransferInst *&TheCopy, +isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V, + MemTransferInst *&TheCopy, SmallVectorImpl &ToDelete) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly @@ -85,11 +85,12 @@ isOnlyCopiedFromConstantMemory(AAResults *AA, if (IsArgOperand && Call->isInAllocaArgument(DataOpNo)) return false; - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can + // If this call site doesn't modify the memory, then we know it is just + // a load (but one that potentially returns the value itself), so we can // ignore it if we know that the value isn't captured. - if (Call->onlyReadsMemory() && - (Call->use_empty() || Call->doesNotCapture(DataOpNo))) + bool NoCapture = Call->doesNotCapture(DataOpNo); + if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) || + (Call->onlyReadsMemory(DataOpNo) && NoCapture)) continue; // If this is being passed as a byval argument, the caller is making a diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll index 128c68c..cd6dfc4 100644 --- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll @@ -380,13 +380,11 @@ define void @volatile_memcpy() { ret void } -; Test that we don't yet elide a memcpy when copying a constant value onto the -; stack and then forwarding it by readonly nocapture reference. +; Test that we can elide a memcpy when copying a constant value onto the stack +; and then forwarding it by readonly nocapture reference. define void @memcpy_to_nocapture_readonly() { ; CHECK-LABEL: @memcpy_to_nocapture_readonly( -; CHECK-NEXT: [[A:%.*]] = alloca [[U:%.*]], align 16 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(20) [[A]], ptr noundef nonnull align 16 dereferenceable(20) @H, i64 20, i1 false) -; CHECK-NEXT: call void @bar(ptr nocapture nonnull readonly [[A]]) +; CHECK-NEXT: call void @bar(ptr nocapture nonnull readonly @H) ; CHECK-NEXT: ret void ; %A = alloca %U, align 16 -- 2.7.4