// Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
- ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
- ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
- if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
- return false;
+ if (MDep->getLength() != M->getLength()) {
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ return false;
+ }
// Verify that the copied-from memory doesn't change in between the two
// transfers. For example, in:
/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
+static bool hasUndefContents(Instruction *I, Value *Size) {
if (isa<AllocaInst>(I))
return true;
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= Size->getZExtValue())
- return true;
+ if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
+ if (LTSize->getZExtValue() >= CSize->getZExtValue())
+ return true;
+ }
return false;
}
static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
- MemoryDef *Def, ConstantInt *Size) {
+ MemoryDef *Def, Value *Size) {
if (MSSA->isLiveOnEntryDef(Def))
return isa<AllocaInst>(getUnderlyingObject(V));
dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
- if (AA->isMustAlias(V, II->getArgOperand(1)) &&
- LTSize->getZExtValue() >= Size->getZExtValue())
- return true;
- // If the lifetime.start covers a whole alloca (as it almost always does)
- // and we're querying a pointer based on that alloca, then we know the
- // memory is definitely undef, regardless of how exactly we alias. The
- // size also doesn't matter, as an out-of-bounds access would be UB.
+ if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
+ if (AA->isMustAlias(V, II->getArgOperand(1)) &&
+ LTSize->getZExtValue() >= CSize->getZExtValue())
+ return true;
+ }
+
+ // If the lifetime.start covers a whole alloca (as it almost always
+ // does) and we're querying a pointer based on that alloca, then we know
+ // the memory is definitely undef, regardless of how exactly we alias.
+ // The size also doesn't matter, as an out-of-bounds access would be UB.
AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
DataLayout DL = Alloca->getModule()->getDataLayout();
/// memset(dst2, c, dst2_size);
/// \endcode
/// When dst2_size <= dst1_size.
-///
-/// The \p MemCpy must have a Constant length.
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
MemSetInst *MemSet) {
// Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
return false;
- // A known memset size is required.
- ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
- if (!MemSetSize)
- return false;
+ Value *MemSetSize = MemSet->getLength();
+ Value *CopySize = MemCpy->getLength();
- // Make sure the memcpy doesn't read any more than what the memset wrote.
- // Don't worry about sizes larger than i64.
- ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
- if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
- // If the memcpy is larger than the memset, but the memory was undef prior
- // to the memset, we can just ignore the tail. Technically we're only
- // interested in the bytes from MemSetSize..CopySize here, but as we can't
- // easily represent this location, we use the full 0..CopySize range.
- MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
- bool CanReduceSize = false;
- if (EnableMemorySSA) {
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
- CanReduceSize = true;
- }
+ if (MemSetSize != CopySize) {
+ // Make sure the memcpy doesn't read any more than what the memset wrote.
+ // Don't worry about sizes larger than i64.
+
+ // A known memset size is required.
+ ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
+ if (!CMemSetSize)
+ return false;
- if (!CanReduceSize)
+ // A known memcpy size is also required.
+ ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
+ if (!CCopySize)
return false;
- CopySize = MemSetSize;
+ if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
+ // If the memcpy is larger than the memset, but the memory was undef prior
+ // to the memset, we can just ignore the tail. Technically we're only
+ // interested in the bytes from MemSetSize..CopySize here, but as we can't
+ // easily represent this location, we use the full 0..CopySize range.
+ MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
+ bool CanReduceSize = false;
+ if (EnableMemorySSA) {
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
+ CanReduceSize = true;
+ } else {
+ MemDepResult DepInfo = MD->getPointerDependencyFrom(
+ MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
+ if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ CanReduceSize = true;
+ }
+
+ if (!CanReduceSize)
+ return false;
+ CopySize = MemSetSize;
+ }
}
IRBuilder<> Builder(MemCpy);
if (processMemSetMemCpyDependence(M, MDep))
return true;
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
AnyClobber, MemoryLocation::getForSource(M));
// d) memcpy from a just-memset'd source can be turned into memset.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for now.
- // Additionally, we need to ensure that there are no accesses to dest
- // between the call and the memcpy. Accesses to src will be checked
- // by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
+ if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for
+ // now. Additionally, we need to ensure that there are no accesses
+ // to dest between the call and the memcpy. Accesses to src will be
+ // checked by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment,
+ C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
}
}
}
}
- if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
+ if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
eraseInstruction(M);
++NumMemCpyInstr;
if (processMemSetMemCpyDependence(M, MDep))
return true;
- // The optimizations after this point require the memcpy size.
- ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (!CopySize) return false;
-
// There are four possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// its lifetime copies undefined data, and we can therefore eliminate
// the memcpy in favor of the data that was already at the destination.
// d) memcpy from a just-memset'd source can be turned into memset.
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
+ if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), Alignment, C)) {
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
}
}
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
return processMemCpyMemCpyDependence(M, MDep);
} else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
+ if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
eraseInstruction(M);
++NumMemCpyInstr;
return true;
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i8* %src, i64 %size) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[SRC]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size
+ %dst = alloca i8, i64 %size
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size, i1 false)
+
+ ret void
+}
+
+; Differing sizes, so left as it is.
+define void @negative_test(i8* %src, i64 %size1, i64 %size2) {
+; CHECK-LABEL: @negative_test(
+; CHECK-NEXT: [[TMP:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP]], i8* align 8 [[SRC:%.*]], i64 [[SIZE1]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST]], i8* align 8 [[TMP]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT: ret void
+;
+ %tmp = alloca i8, i64 %size1
+ %dst = alloca i8, i64 %size2
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp, i8* align 8 %src, i64 %size1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst, i8* align 8 %tmp, i64 %size2, i1 false)
+
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s
+; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i8* %src, i8 %c, i64 %size) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE:%.*]], align 1
+; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE]], align 1
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST2]], i8 [[C]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: ret void
+;
+ %dst1 = alloca i8, i64 %size
+ %dst2 = alloca i8, i64 %size
+ call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size, i1 false)
+
+ ret void
+}
+
+; Differing sizes, so left as it is.
+define void @negative_test(i8* %src, i8 %c, i64 %size1, i64 %size2) {
+; CHECK-LABEL: @negative_test(
+; CHECK-NEXT: [[DST1:%.*]] = alloca i8, i64 [[SIZE1:%.*]], align 1
+; CHECK-NEXT: [[DST2:%.*]] = alloca i8, i64 [[SIZE2:%.*]], align 1
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[DST1]], i8 [[C:%.*]], i64 [[SIZE1]], i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST2]], i8* align 8 [[DST1]], i64 [[SIZE2]], i1 false)
+; CHECK-NEXT: ret void
+;
+ %dst1 = alloca i8, i64 %size1
+ %dst2 = alloca i8, i64 %size2
+ call void @llvm.memset.p0i8.i64(i8* align 8 %dst1, i8 %c, i64 %size1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %dst2, i8* align 8 %dst1, i64 %size2, i1 false)
+
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1)