#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
+ OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const DataLayout *DL)
- : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
+ const DataLayout *DL,
+ OptimizationRemarkEmitter &ORE)
+ : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
+ // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+ // pass. Function analyses need to be preserved across loop transformations
+ // but ORE cannot be preserved (see comment before the pass definition).
+ OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
+
+ LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
- LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
+ const auto &FAM =
+ AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
+ Function *F = L.getHeader()->getParent();
+
+ auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+ // FIXME: This should probably be optional rather than required.
+ if (!ORE)
+ report_fatal_error(
+ "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
+ "at a higher level");
+
+ LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
+ *ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
<< "\n");
NewCall->setDebugLoc(TheStore->getDebugLoc());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader)
+ << "Transformed loop-strided store into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
for (auto *I : Stores)
<< " from store ptr=" << *StoreEv << " at: " << *SI
<< "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
+ NewCall->getDebugLoc(), Preheader)
+ << "Formed a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() function";
+ });
+
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI);
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
+
+; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function
+
+define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
+; CHECK-LABEL: @test6_dest_align(
+; CHECK-NEXT: bb.nph:
+; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
+; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE:%.*]], 2, !dbg !18
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
+; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18
+; CHECK: for.body:
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20
+; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21
+; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22
+; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23
+; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26
+; CHECK: for.end:
+; CHECK-NEXT: ret void, !dbg !27
+;
+bb.nph:
+ br label %for.body
+
+for.body: ; preds = %bb.nph, %for.body
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
+ %DestI = getelementptr i32, i32* %Dest, i64 %indvar
+ %V = load i32, i32* %I.0.014, align 1
+ store i32 %V, i32* %DestI, align 4
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
+
+; void my_basic_memset(char* begin, char* end, char value) {
+; for( ; begin != end; ++begin)
+; *begin = value;
+; }
+
+; CHECK: remark: <stdin>:4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function
+
+define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) {
+; CHECK-LABEL: @_Z15my_basic_memsetPcS_c(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg !15
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata !9, metadata !DIExpression()), !dbg !15
+; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg !16
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg !17
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg !17
+; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg !17
+; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !17
+; CHECK: for.body:
+; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg !18
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_ADDR_04]], metadata !11, metadata !DIExpression()), !dbg !18
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR_ADDR_04]], i64 1, !dbg !19
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR]], metadata !13, metadata !DIExpression()), !dbg !19
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[END]], !dbg !20
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !14, metadata !DIExpression()), !dbg !20
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !21
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: br label [[FOR_END]], !dbg !22
+; CHECK: for.end:
+; CHECK-NEXT: ret void, !dbg !22
+;
+entry:
+ %cmp3 = icmp eq i8* %ptr, %end
+ br i1 %cmp3, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %ptr.addr.04 = phi i8* [ %incdec.ptr, %for.body ], [ %ptr, %entry ]
+ store i8 %value, i8* %ptr.addr.04, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.04, i64 1
+ %cmp = icmp eq i8* %incdec.ptr, %end
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
; RUN: opt -loop-idiom < %s -S | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(loop-idiom)' < %s -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"