From 9a45e4beede24fdcd6b7d95416d0447ba154651c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 13 Oct 2022 16:45:37 +0200 Subject: [PATCH] [MemCpyOpt] Move lifetime marker before call to enable call slot optimization Currently call slot optimization may be prevented because the lifetime markers for the destination only start after the call. In this case, rather than aborting the transform, we should move the lifetime.start before the call to enable the transform. Differential Revision: https://reviews.llvm.org/D135886 --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 28 +++++++++++++++++++++----- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 3 +-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 387b71d..43259cb 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -331,16 +331,27 @@ void MemCpyOptPass::eraseInstruction(Instruction *I) { } // Check for mod or ref of Loc between Start and End, excluding both boundaries. -// Start and End must be in the same block +// Start and End must be in the same block. +// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start +// intrinsic and store it inside SkippedLifetimeStart. static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { + const MemoryUseOrDef *End, + Instruction **SkippedLifetimeStart = nullptr) { assert(Start->getBlock() == End->getBlock() && "Only local supported"); for (const MemoryAccess &MA : make_range(++Start->getIterator(), End->getIterator())) { - if (isModOrRefSet(AA.getModRefInfo(cast(MA).getMemoryInst(), - Loc))) + Instruction *I = cast(MA).getMemoryInst(); + if (isModOrRefSet(AA.getModRefInfo(I, Loc))) { + auto *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::lifetime_start && + SkippedLifetimeStart && !*SkippedLifetimeStart) { + *SkippedLifetimeStart = I; + continue; + } + return true; + } } return false; } @@ -913,8 +924,9 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that nothing touches the dest of the copy between // the call and the store/memcpy. + Instruction *SkippedLifetimeStart = nullptr; if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(cpyStore))) { + MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) { LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); return false; } @@ -1094,6 +1106,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, cast(cpyDest)->setAlignment(srcAlign); } + if (SkippedLifetimeStart) { + SkippedLifetimeStart->moveBefore(C); + MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart), + MSSA->getMemoryAccess(C)); + } + // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 77b94a7..58e4ab8 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -55,9 +55,8 @@ define i32 @call_slot_move_lifetime_start() { ; CHECK-LABEL: @call_slot_move_lifetime_start( ; CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @call(ptr [[TMP]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DST]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[TMP]], i64 4, i1 false) +; CHECK-NEXT: call void @call(ptr [[DST]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DST]]) ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[DST]], align 4 ; CHECK-NEXT: ret i32 [[V]] -- 2.7.4