From efce19c3b0920944e2223a8dbc363432fe39e077 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Wed, 28 Apr 2021 13:15:46 +0200 Subject: [PATCH] Revert "[loop-idiom] Hoist loop memcpys to loop preheader" This reverts commit 75d6b8bb4056d518d06b72e6411ce3749455e2e3. The reasoning is mentioned in https://reviews.llvm.org/D97667 --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 148 ++-------- .../LoopIdiom/memcpy-debugify-remarks.ll | 2 +- .../LoopIdiom/memcpy-intrinsic-different-types.ll | 89 ------ llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll | 309 --------------------- .../LoopIdiom/memset-debugify-remarks.ll | 2 +- 5 files changed, 24 insertions(+), 526 deletions(-) delete mode 100644 llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll delete mode 100644 llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 46077b0..c9bce17 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -205,13 +205,6 @@ private: enum class ForMemset { No, Yes }; bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, ForMemset For); - - template - bool processLoopMemIntrinsic( - BasicBlock *BB, - bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *), - const SCEV *BECount); - bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, @@ -642,10 +635,22 @@ bool LoopIdiomRecognize::runOnLoopBlock( for (auto &SI : StoreRefsForMemcpy) MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); - MadeChange |= processLoopMemIntrinsic( - BB, &LoopIdiomRecognize::processLoopMemCpy, BECount); - MadeChange |= processLoopMemIntrinsic( - BB, &LoopIdiomRecognize::processLoopMemSet, BECount); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; + // Look for memset instructions, which may be optimized to a larger memset. + if (MemSetInst *MSI = dyn_cast(Inst)) { + WeakTrackingVH InstPtr(&*I); + if (!processLoopMemSet(MSI, BECount)) + continue; + MadeChange = true; + + // If processing the memset invalidated our iterator, start over from the + // top of the block. + if (!InstPtr) + I = BB->begin(); + continue; + } + } return MadeChange; } @@ -794,86 +799,6 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, return Changed; } -/// processLoopMemIntrinsic - Template function for calling different processor -/// functions based on mem instrinsic type. -template -bool LoopIdiomRecognize::processLoopMemIntrinsic( - BasicBlock *BB, - bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *), - const SCEV *BECount) { - bool MadeChange = false; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *Inst = &*I++; - // Look for memory instructions, which may be optimized to a larger one. - if (MemInst *MI = dyn_cast(Inst)) { - WeakTrackingVH InstPtr(&*I); - if (!(this->*Processor)(MI, BECount)) - continue; - MadeChange = true; - - // If processing the instruction invalidated our iterator, start over from - // the top of the block. - if (!InstPtr) - I = BB->begin(); - } - } - return MadeChange; -} - -/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy -bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI, - const SCEV *BECount) { - // We can only handle non-volatile memcpys with a constant size. - if (MCI->isVolatile() || !isa(MCI->getLength())) - return false; - - // If we're not allowed to hack on memcpy, we fail. - if (!HasMemcpy || DisableLIRP::Memcpy) - return false; - - Value *Dest = MCI->getDest(); - Value *Source = MCI->getSource(); - if (!Dest || !Source) - return false; - - // See if the load and store pointer expressions are AddRec like {base,+,1} on - // the current loop, which indicates a strided load and store. If we have - // something else, it's a random load or store we can't handle. - const SCEVAddRecExpr *StoreEv = dyn_cast(SE->getSCEV(Dest)); - if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) - return false; - const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(Source)); - if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; - - // Reject memcpys that are so large that they overflow an unsigned. - uint64_t SizeInBytes = cast(MCI->getLength())->getZExtValue(); - if ((SizeInBytes >> 32) != 0) - return false; - - // Check if the stride matches the size of the memcpy. If so, then we know - // that every byte is touched in the loop. - const SCEVConstant *StrStride = - dyn_cast(StoreEv->getOperand(1)); - const SCEVConstant *LoadStride = - dyn_cast(LoadEv->getOperand(1)); - if (!StrStride || !LoadStride) - return false; - - APInt StrIntStride = StrStride->getAPInt(); - APInt LoadIntStride = LoadStride->getAPInt(); - if (SizeInBytes != StrIntStride && SizeInBytes != -StrIntStride) - return false; - - // Check if the load stride matches the store stride. - if (StrIntStride != LoadIntStride && StrIntStride != -LoadIntStride) - return false; - - return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes, - MCI->getDestAlign(), MCI->getSourceAlign(), - MCI, MCI, StoreEv, LoadEv, BECount); -} - /// processLoopMemSet - See if this memset can be promoted to a large memset. bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { @@ -882,7 +807,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, return false; // If we're not allowed to hack on memset, we fail. - if (!HasMemset || DisableLIRP::Memset) + if (!HasMemset) return false; Value *Pointer = MSI->getDest(); @@ -1122,11 +1047,9 @@ bool LoopIdiomRecognize::processLoopStridedStore( ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore", NewCall->getDebugLoc(), Preheader) - << "Transformed loop-strided store in " - << ore::NV("Function", TheStore->getFunction()) - << " function into a call to " + << "Transformed loop-strided store into a call to " << ore::NV("NewFunction", NewCall->getCalledFunction()) - << "() intrinsic"; + << "() function"; }); // Okay, the memset has been formed. Zap the original store and anything that @@ -1214,22 +1137,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( SmallPtrSet Stores; Stores.insert(TheStore); - - bool IsMemCpy = isa(TheStore); - const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store"; - if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore", - TheStore) - << ore::NV("Inst", InstRemark) << " in " - << ore::NV("Function", TheStore->getFunction()) - << " function will not be hoisted: " - << ore::NV("Reason", "The loop may access store location"); - }); + StoreSize, *AA, Stores)) return Changed; - } const SCEV *LdStart = LoadEv->getStart(); unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace(); @@ -1243,21 +1153,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); - // If the store is a memcpy instruction, we must check if it will write to - // the load memory locations. So remove it from the ignored stores. - if (IsMemCpy) - Stores.erase(TheStore); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, - StoreSize, *AA, Stores)) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad) - << ore::NV("Inst", InstRemark) << " in " - << ore::NV("Function", TheStore->getFunction()) - << " function will not be hoisted: " - << ore::NV("Reason", "The loop may access load location"); - }); + StoreSize, *AA, Stores)) return Changed; - } if (avoidLIRForMultiBlockLoop()) return Changed; @@ -1318,9 +1216,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( NewCall->getDebugLoc(), Preheader) << "Formed a call to " << ore::NV("NewFunction", NewCall->getCalledFunction()) - << "() intrinsic from " << ore::NV("Inst", InstRemark) - << " instruction in " << ore::NV("Function", TheStore->getFunction()) - << " function"; + << "() function"; }); // Okay, the memcpy has been formed. Zap the original store and anything that diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll index 6f817f2..3578540 100644 --- a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu" ; Check that everything still works when debuginfo is present, and that it is reasonably propagated. -; CHECK: remark: :6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() intrinsic from load and store instruction in test6_dest_align function +; CHECK: remark: :6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp { ; CHECK-LABEL: @test6_dest_align( diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll deleted file mode 100644 index 20def75..0000000 --- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll +++ /dev/null @@ -1,89 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -loop-idiom < %s -S | FileCheck %s - -; #include -; -; class SDValue { -; int A; -; int B; -; unsigned C; -; }; -; -; class SDUse { -; SDValue Val; -; SDUse **Prev = nullptr; -; SDUse *Next = nullptr; -; -; public: -; operator const SDValue&() const { return Val; } -; }; -; -; void foo(SDUse *S, int N) { -; // Should not hoist memcpy because source and destination are of different types -; std::vector Ops(S, S + N); -; } - -; ModuleID = 'different_types.cpp' -source_filename = "different_types.cpp" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%class.SDUse = type { %class.SDValue, %class.SDUse**, %class.SDUse* } -%class.SDValue = type { i32, i32, i32 } - -declare dso_local i32 @__gxx_personality_v0(...) - -; Function Attrs: uwtable mustprogress -define linkonce_odr dso_local %class.SDValue* @_ZNSt20__uninitialized_copyILb0EE13__uninit_copyIP5SDUseP7SDValueEET0_T_S7_S6_(%class.SDUse* %__first, %class.SDUse* %__last, %class.SDValue* %__result) local_unnamed_addr #0 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @_ZNSt20__uninitialized_copyILb0EE13__uninit_copyIP5SDUseP7SDValueEET0_T_S7_S6_( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP_NOT15:%.*]] = icmp eq %class.SDUse* [[__FIRST:%.*]], [[__LAST:%.*]] -; CHECK-NEXT: br i1 [[CMP_NOT15]], label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]] -; CHECK: for.inc.preheader: -; CHECK-NEXT: br label [[FOR_INC:%.*]] -; CHECK: for.inc: -; CHECK-NEXT: [[__CUR_017:%.*]] = phi %class.SDValue* [ [[INCDEC_PTR1:%.*]], [[FOR_INC]] ], [ [[__RESULT:%.*]], [[FOR_INC_PREHEADER]] ] -; CHECK-NEXT: [[__FIRST_ADDR_016:%.*]] = phi %class.SDUse* [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ], [ [[__FIRST]], [[FOR_INC_PREHEADER]] ] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %class.SDValue* [[__CUR_017]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %class.SDUse* [[__FIRST_ADDR_016]] to i8* -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(12) [[TMP0]], i8* noundef nonnull align 8 dereferenceable(12) [[TMP1]], i64 12, i1 false) -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds [[CLASS_SDUSE:%.*]], %class.SDUse* [[__FIRST_ADDR_016]], i64 1 -; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds [[CLASS_SDVALUE:%.*]], %class.SDValue* [[__CUR_017]], i64 1 -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq %class.SDUse* [[INCDEC_PTR]], [[__LAST]] -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_INC]] -; CHECK: for.end.loopexit: -; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi %class.SDValue* [ [[INCDEC_PTR1]], [[FOR_INC]] ] -; CHECK-NEXT: br label [[FOR_END]] -; CHECK: for.end: -; CHECK-NEXT: [[__CUR_0_LCSSA:%.*]] = phi %class.SDValue* [ [[__RESULT]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1_LCSSA]], [[FOR_END_LOOPEXIT]] ] -; CHECK-NEXT: ret %class.SDValue* [[__CUR_0_LCSSA]] -; -entry: - %cmp.not15 = icmp eq %class.SDUse* %__first, %__last - br i1 %cmp.not15, label %for.end, label %for.inc.preheader - -for.inc.preheader: ; preds = %entry - br label %for.inc - -for.inc: ; preds = %for.inc.preheader, %for.inc - %__cur.017 = phi %class.SDValue* [ %incdec.ptr1, %for.inc ], [ %__result, %for.inc.preheader ] - %__first.addr.016 = phi %class.SDUse* [ %incdec.ptr, %for.inc ], [ %__first, %for.inc.preheader ] - %0 = bitcast %class.SDValue* %__cur.017 to i8* - %1 = bitcast %class.SDUse* %__first.addr.016 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(12) %0, i8* noundef nonnull align 8 dereferenceable(12) %1, i64 12, i1 false) - %incdec.ptr = getelementptr inbounds %class.SDUse, %class.SDUse* %__first.addr.016, i64 1 - %incdec.ptr1 = getelementptr inbounds %class.SDValue, %class.SDValue* %__cur.017, i64 1 - %cmp.not = icmp eq %class.SDUse* %incdec.ptr, %__last - br i1 %cmp.not, label %for.end.loopexit, label %for.inc - -for.end.loopexit: ; preds = %for.inc - %incdec.ptr1.lcssa = phi %class.SDValue* [ %incdec.ptr1, %for.inc ] - br label %for.end - -for.end: ; preds = %for.end.loopexit, %entry - %__cur.0.lcssa = phi %class.SDValue* [ %__result, %entry ], [ %incdec.ptr1.lcssa, %for.end.loopexit ] - ret %class.SDValue* %__cur.0.lcssa -} - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll deleted file mode 100644 index bb0d68e..0000000 --- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll +++ /dev/null @@ -1,309 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -loop-idiom < %s -S | FileCheck %s - -%struct.S = type { i32, i32, i8 } - -; unsigned copy_noalias(S* __restrict a, S *b, int n) { -; for (int i = 0; i < n; i++) { -; a[i] = b[i]; -; } -; return sizeof(a[0]); -; } - -; Function Attrs: nofree nounwind uwtable mustprogress -define dso_local i32 @copy_noalias(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { -; CHECK-LABEL: @copy_noalias( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = bitcast %struct.S* [[A:%.*]] to i8* -; CHECK-NEXT: [[B2:%.*]] = bitcast %struct.S* [[B:%.*]] to i8* -; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 12 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A1]], i8* align 4 [[B2]], i64 [[TMP1]], i1 false) -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret i32 12 -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[B]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX]] to i8* -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; -entry: - %cmp7 = icmp sgt i32 %n, 0 - br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 12 - -for.body: ; preds = %for.body.preheader, %for.body - %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %idxprom = zext i32 %i.08 to i64 - %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom - %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom - %0 = bitcast %struct.S* %arrayidx2 to i8* - %1 = bitcast %struct.S* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) - %inc = add nuw nsw i32 %i.08, 1 - %cmp = icmp slt i32 %inc, %n - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit -} - -; unsigned copy_may_alias(S *a, S *b, int n) { -; for (int i = 0; i < n; i++) { -; a[i] = b[i]; -; } -; return sizeof(a[0]); -; } - -; Function Attrs: nofree nounwind uwtable mustprogress -define dso_local i32 @copy_may_alias(%struct.S* nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { -; CHECK-LABEL: @copy_may_alias( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret i32 12 -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[B:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[A:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[ARRAYIDX]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) [[TMP0]], i8* nonnull align 4 dereferenceable(12) [[TMP1]], i64 12, i1 false) -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; -entry: - %cmp7 = icmp sgt i32 %n, 0 - br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 12 - -for.body: ; preds = %for.body.preheader, %for.body - %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %idxprom = zext i32 %i.08 to i64 - %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom - %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom - %0 = bitcast %struct.S* %arrayidx2 to i8* - %1 = bitcast %struct.S* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) - %inc = add nuw nsw i32 %i.08, 1 - %cmp = icmp slt i32 %inc, %n - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit -} - -%struct.R = type <{ i8, i32, i8 }> - -; void copy_noalias_read(S* __restrict x, S* __restrict y, int n, int &s) { -; for (int i = 0; i < n; i++) { -; x[i] = y[i]; -; s += y[i].b; -; } -; } - -; Function Attrs: nofree nounwind uwtable mustprogress -define dso_local void @copy_noalias_read(%struct.R* noalias nocapture %x, %struct.R* noalias nocapture readonly %y, i32 %n, i32* nocapture nonnull align 4 dereferenceable(4) %s) local_unnamed_addr #0 { -; CHECK-LABEL: @copy_noalias_read( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[X1:%.*]] = bitcast %struct.R* [[X:%.*]] to i8* -; CHECK-NEXT: [[Y2:%.*]] = bitcast %struct.R* [[Y:%.*]] to i8* -; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[S_PROMOTED:%.*]] = load i32, i32* [[S:%.*]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 6 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[X1]], i8* align 1 [[Y2]], i64 [[TMP1]], i1 false) -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.for.cond.cleanup_crit_edge: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], i32* [[S]], align 4 -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[ADD13:%.*]] = phi i32 [ [[S_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: [[I_012:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_012]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_R:%.*]], %struct.R* [[X]], i64 [[IDXPROM]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_R]], %struct.R* [[Y]], i64 [[IDXPROM]], i32 0 -; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_R]], %struct.R* [[Y]], i64 [[IDXPROM]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 1 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[ADD13]], [[TMP4]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_012]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]] -; -entry: - %cmp11 = icmp sgt i32 %n, 0 - br i1 %cmp11, label %for.body.lr.ph, label %for.cond.cleanup - -for.body.lr.ph: ; preds = %entry - %s.promoted = load i32, i32* %s, align 4 - br label %for.body - -for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body - %add.lcssa = phi i32 [ %add, %for.body ] - store i32 %add.lcssa, i32* %s, align 4 - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry - ret void - -for.body: ; preds = %for.body.lr.ph, %for.body - %add13 = phi i32 [ %s.promoted, %for.body.lr.ph ], [ %add, %for.body ] - %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] - %idxprom = zext i32 %i.012 to i64 - %0 = getelementptr inbounds %struct.R, %struct.R* %x, i64 %idxprom, i32 0 - %1 = getelementptr inbounds %struct.R, %struct.R* %y, i64 %idxprom, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(6) %0, i8* nonnull align 1 dereferenceable(6) %1, i64 6, i1 false) - %b = getelementptr inbounds %struct.R, %struct.R* %y, i64 %idxprom, i32 1 - %2 = load i32, i32* %b, align 1 - %add = add nsw i32 %add13, %2 - %inc = add nuw nsw i32 %i.012, 1 - %cmp = icmp slt i32 %inc, %n - br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge -} - -%struct.SPacked = type <{ i32, i32, i8 }> - -; Function Attrs: nofree nounwind uwtable mustprogress -define dso_local i32 @copy_noalias_packed(%struct.SPacked* noalias nocapture %a, %struct.SPacked* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { -; CHECK-LABEL: @copy_noalias_packed( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = bitcast %struct.SPacked* [[A:%.*]] to i8* -; CHECK-NEXT: [[B2:%.*]] = bitcast %struct.SPacked* [[B:%.*]] to i8* -; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 9 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[A1]], i8* align 1 [[B2]], i64 [[TMP1]], i1 false) -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret i32 9 -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SPACKED:%.*]], %struct.SPacked* [[B]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_SPACKED]], %struct.SPacked* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.SPacked* [[ARRAYIDX2]] to i8* -; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.SPacked* [[ARRAYIDX]] to i8* -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; -entry: - %cmp7 = icmp sgt i32 %n, 0 - br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 9 - -for.body: ; preds = %for.body.preheader, %for.body - %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %idxprom = zext i32 %i.08 to i64 - %arrayidx = getelementptr inbounds %struct.SPacked, %struct.SPacked* %b, i64 %idxprom - %arrayidx2 = getelementptr inbounds %struct.SPacked, %struct.SPacked* %a, i64 %idxprom - %0 = bitcast %struct.SPacked* %arrayidx2 to i8* - %1 = bitcast %struct.SPacked* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(9) %0, i8* nonnull align 1 dereferenceable(9) %1, i64 9, i1 false) - %inc = add nuw nsw i32 %i.08, 1 - %cmp = icmp slt i32 %inc, %n - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit -} - -%struct.SAligned = type { i32, i32, i8, [7 x i8] } - -define dso_local i32 @copy_noalias_aligned(%struct.SAligned* noalias nocapture %a, %struct.SAligned* nocapture readonly %b, i32 %n) local_unnamed_addr #0 { -; CHECK-LABEL: @copy_noalias_aligned( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A1:%.*]] = bitcast %struct.SAligned* [[A:%.*]] to i8* -; CHECK-NEXT: [[B2:%.*]] = bitcast %struct.SAligned* [[B:%.*]] to i8* -; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A1]], i8* align 16 [[B2]], i64 [[TMP1]], i1 false) -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret i32 16 -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SALIGNED:%.*]], %struct.SAligned* [[B]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_SALIGNED]], %struct.SAligned* [[A]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.SAligned* [[ARRAYIDX2]] to i8* -; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.SAligned* [[ARRAYIDX]] to i8* -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; -entry: - %cmp7 = icmp sgt i32 %n, 0 - br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup - -for.body.preheader: ; preds = %entry - br label %for.body - -for.cond.cleanup.loopexit: ; preds = %for.body - br label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 16 - -for.body: ; preds = %for.body.preheader, %for.body - %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] - %idxprom = zext i32 %i.08 to i64 - %arrayidx = getelementptr inbounds %struct.SAligned, %struct.SAligned* %b, i64 %idxprom - %arrayidx2 = getelementptr inbounds %struct.SAligned, %struct.SAligned* %a, i64 %idxprom - %0 = bitcast %struct.SAligned* %arrayidx2 to i8* - %1 = bitcast %struct.SAligned* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %0, i8* nonnull align 16 dereferenceable(16) %1, i64 16, i1 false) - %inc = add nuw nsw i32 %i.08, 1 - %cmp = icmp slt i32 %inc, %n - br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit -} - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll index b7a866f..06e17fe 100644 --- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu" ; *begin = value; ; } -; CHECK: remark: :4:1: Transformed loop-strided store in _Z15my_basic_memsetPcS_c function into a call to llvm.memset.p0i8.i64() intrinsic +; CHECK: remark: :4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) { ; CHECK-LABEL: @_Z15my_basic_memsetPcS_c( -- 2.7.4