From: Nikita Popov Date: Sat, 17 Oct 2020 13:54:52 +0000 (+0200) Subject: [MemCpyOpt] Move GEP during call slot optimization X-Git-Tag: llvmorg-13-init~8465 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3e37543111f40c3fa2430a80eb0293ae3b814dd3;p=platform%2Fupstream%2Fllvm.git [MemCpyOpt] Move GEP during call slot optimization When performing a call slot optimization to a GEP destination, it will currently usually fail, because the GEP is directly before the memcpy and as such does not dominate the call. We should move it above the call if that satisfies the domination requirement. I think that a constant-index GEP is the only useful thing to move here, as otherwise isDereferenceablePointer couldn't look through it anyway. As such I'm not trying to generalize this further. Differential Revision: https://reviews.llvm.org/D89623 --- diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d8a2f55..a08a1a7 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -913,10 +913,15 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - // TODO: Support moving instructions like GEPs upwards. - if (Instruction *cpyDestInst = dyn_cast(cpyDest)) - if (!DT->dominates(cpyDestInst, C)) + if (!DT->dominates(cpyDest, C)) { + // Support moving a constant index GEP before the call. + auto *GEP = dyn_cast(cpyDest); + if (GEP && GEP->hasAllConstantIndices() && + DT->dominates(GEP->getPointerOperand(), C)) + GEP->moveBefore(C); + else return false; + } // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll index 37a3cde..f85a676 100644 --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -150,9 +150,10 @@ define void @dest_is_gep_requires_movement() { ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3]] ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8]