From 9e6e63fba2e63bc971565d4cb7f4cda30ab3366d Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Fri, 22 Jul 2016 22:53:17 +0000 Subject: [PATCH] [LoopDataPrefetch] Include hotness of region in opt remark llvm-svn: 276488 --- .../llvm/Analysis/OptimizationDiagnosticInfo.h | 24 +++++++ llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp | 10 +-- .../AArch64/opt-remark-with-hotness.ll | 82 ++++++++++++++++++++++ 3 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll diff --git a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h index cee8bfb..41e80bf 100644 --- a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -58,6 +58,14 @@ public: /// debug location from the Loop parameter \p L. void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemark(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(), + Msg); + } + /// Emit an optimization-missed message. /// /// \p PassName is the name of the pass emitting the message. If @@ -73,6 +81,14 @@ public: void emitOptimizationRemarkMissed(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(), + Inst->getParent(), Msg); + } + /// Emit an optimization analysis remark message. /// /// \p PassName is the name of the pass emitting the message. If @@ -89,6 +105,14 @@ public: void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Same as above but derives the debug location and the code region + /// from the debug location and the basic block of \p Inst, respectively. + void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst, + const Twine &Msg) { + emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(), + Inst->getParent(), Msg); + } + /// \brief Emit an optimization analysis remark related to floating-point /// non-commutativity. /// diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index e3ed628b..fb14b97 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -25,7 +26,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" @@ -77,6 +77,7 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); AU.addRequired(); // FIXME: For some reason, preserving SE here breaks LSR (even if // this pass changes nothing). @@ -116,6 +117,7 @@ namespace { ScalarEvolution *SE; const TargetTransformInfo *TTI; const DataLayout *DL; + OptimizationRemarkEmitter *ORE; }; } @@ -125,6 +127,7 @@ INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch", INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch", "Loop Data Prefetch", false, false) @@ -155,6 +158,7 @@ bool LoopDataPrefetch::runOnFunction(Function &F) { SE = &getAnalysis().getSE(); DL = &F.getParent()->getDataLayout(); AC = &getAnalysis().getAssumptionCache(F); + ORE = &getAnalysis().getORE(); TTI = &getAnalysis().getTTI(F); // If PrefetchDistance is not set, don't run the pass. This gives an @@ -291,9 +295,7 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { ++NumPrefetches; DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV << "\n"); - emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, - MemI->getDebugLoc(), "prefetched memory access"); - + ORE->emitOptimizationRemark(DEBUG_TYPE, MemI, "prefetched memory access"); MadeChange = true; } diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll new file mode 100644 index 0000000..d085fca --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll @@ -0,0 +1,82 @@ +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \ +; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \ +; RUN: -pass-remarks-with-hotness \ +; RUN: < %s 2>&1 | FileCheck %s + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios5.0.0" + +; 1 struct MyStruct { +; 2 int field; +; 3 char kk[2044]; +; 4 } *my_struct; +; 5 +; 6 int f(struct MyStruct *p, int N) { +; 7 int total = 0; +; 8 for (int i = 0; i < N; i++) { +; 9 total += my_struct[i].field; +; 10 } +; 11 return total; +; 12 } + +; CHECK: remark: /tmp/s.c:9:27: prefetched memory access (hotness: 600) + +%struct.MyStruct = type { i32, [2044 x i8] } + +@my_struct = common global %struct.MyStruct* null, align 8 + +define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 !prof !21 { +entry: + %cmp6 = icmp sgt i32 %N, 0, !dbg !8 + br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9, !prof !22 + +for.body.lr.ph: ; preds = %entry + %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11 + br label %for.body, !dbg !9 + +for.cond.cleanup: ; preds = %for.body, %entry + %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %total.0.lcssa, !dbg !15 + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16 + %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17 + %add = add nsw i32 %1, %total.07, !dbg !20 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9 + %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9 + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9, !prof !23 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"PIC Level", i32 2} +!5 = !{!"clang version 3.9.0"} +!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 8, column: 21, scope: !6) +!9 = !DILocation(line: 8, column: 3, scope: !6) +!10 = !DILocation(line: 9, column: 14, scope: !6) +!11 = !{!12, !12, i64 0} +!12 = !{!"any pointer", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 11, column: 3, scope: !6) +!16 = !DILocation(line: 9, column: 27, scope: !6) +!17 = !{!18, !19, i64 0} +!18 = !{!"MyStruct", !19, i64 0, !13, i64 4} +!19 = !{!"int", !13, i64 0} +!20 = !DILocation(line: 9, column: 11, scope: !6) +!21 = !{!"function_entry_count", i64 6} +!22 = !{!"branch_weights", i32 99, i32 1} +!23 = !{!"branch_weights", i32 1, i32 99} -- 2.7.4