From 0e0e2d5d2640c629741cab88ae3d9736f6900bb4 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 20 Jul 2016 23:50:32 +0000 Subject: [PATCH] [OptDiag,LV] Add hotness attribute to the derived analysis remarks This includes FPCompute and Aliasing. Testcase is based on no_fpmath.ll. llvm-svn: 276211 --- .../llvm/Analysis/OptimizationDiagnosticInfo.h | 30 ++++++ llvm/include/llvm/IR/DiagnosticInfo.h | 32 +++--- llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp | 20 ++++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 18 ++-- .../LoopVectorize/X86/no_fpmath_with_hotness.ll | 113 +++++++++++++++++++++ 5 files changed, 189 insertions(+), 24 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll diff --git a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h index d196774..dfa6177 100644 --- a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -74,6 +74,36 @@ public: void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L, const Twine &Msg); + /// \brief Emit an optimization analysis remark related to floating-point + /// non-commutativity. + /// + /// \p PassName is the name of the pass emitting the message. If + /// -Rpass-analysis= is given and \p PassName matches the regular expression + /// in -Rpass, then the remark will be emitted. \p Fn is the function + /// triggering the remark, \p DLoc is the debug location where the diagnostic + /// is generated.\p V is the IR Value that identifies the code region. \p Msg + /// is the message string to use. + void emitOptimizationRemarkAnalysisFPCommute(const char *PassName, + const DebugLoc &DLoc, Value *V, + const Twine &Msg); + + /// \brief Emit an optimization analysis remark related to pointer aliasing. + /// + /// \p PassName is the name of the pass emitting the message. If + /// -Rpass-analysis= is given and \p PassName matches the regular expression + /// in -Rpass, then the remark will be emitted. \p Fn is the function + /// triggering the remark, \p DLoc is the debug location where the diagnostic + /// is generated.\p V is the IR Value that identifies the code region. \p Msg + /// is the message string to use. + void emitOptimizationRemarkAnalysisAliasing(const char *PassName, + const DebugLoc &DLoc, Value *V, + const Twine &Msg); + + /// \brief Same as above but derives the IR Value for the code region and the + /// debug location from the Loop parameter \p L. + void emitOptimizationRemarkAnalysisAliasing(const char *PassName, Loop *L, + const Twine &Msg); + private: Function *F; diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 1a7232a..b8667fc 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -504,13 +504,11 @@ public: bool shouldAlwaysPrint() const { return getPassName() == AlwaysPrint; } protected: - DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind, - const char *PassName, - const Function &Fn, - const DebugLoc &DLoc, - const Twine &Msg) - : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc, - Msg) {} + DiagnosticInfoOptimizationRemarkAnalysis( + enum DiagnosticKind Kind, const char *PassName, const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg, Optional Hotness) + : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc, Msg, + Hotness) {} }; /// Diagnostic information for optimization analysis remarks related to @@ -528,12 +526,12 @@ public: /// floating-point non-commutativity. Note that this class does not copy this /// message, so this reference must be valid for the whole life time of the /// diagnostic. - DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName, - const Function &Fn, - const DebugLoc &DLoc, - const Twine &Msg) + DiagnosticInfoOptimizationRemarkAnalysisFPCommute( + const char *PassName, const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg, Optional Hotness = None) : DiagnosticInfoOptimizationRemarkAnalysis( - DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {} + DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg, + Hotness) {} static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute; @@ -555,12 +553,12 @@ public: /// pointer aliasing legality. Note that this class does not copy this /// message, so this reference must be valid for the whole life time of the /// diagnostic. - DiagnosticInfoOptimizationRemarkAnalysisAliasing(const char *PassName, - const Function &Fn, - const DebugLoc &DLoc, - const Twine &Msg) + DiagnosticInfoOptimizationRemarkAnalysisAliasing( + const char *PassName, const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg, Optional Hotness = None) : DiagnosticInfoOptimizationRemarkAnalysis( - DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg) {} + DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg, + Hotness) {} static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_OptimizationRemarkAnalysisAliasing; diff --git a/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp index 0e65b59..cae2253 100644 --- a/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -54,6 +54,26 @@ void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis( Msg); } +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute( + const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) { + LLVMContext &Ctx = F->getContext(); + Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute( + PassName, *F, DLoc, Msg, computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( + const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) { + LLVMContext &Ctx = F->getContext(); + Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisAliasing( + PassName, *F, DLoc, Msg, computeHotness(V))); +} + +void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing( + const char *PassName, Loop *L, const Twine &Msg) { + emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(), + L->getHeader(), Msg); +} + OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() : FunctionPass(ID) { initializeOptimizationRemarkEmitterWrapperPassPass( diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9d90430..5befef9 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1747,8 +1747,8 @@ public: /// followed by a non-expert user. class LoopVectorizationRequirements { public: - LoopVectorizationRequirements() - : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {} + LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) + : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr), ORE(ORE) {} void addUnsafeAlgebraInst(Instruction *I) { // First unsafe algebra instruction. @@ -1762,8 +1762,9 @@ public: const char *Name = Hints.vectorizeAnalysisPassName(); bool Failed = false; if (UnsafeAlgebraInst && !Hints.allowReordering()) { - emitOptimizationRemarkAnalysisFPCommute( - F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(), + ORE.emitOptimizationRemarkAnalysisFPCommute( + Name, UnsafeAlgebraInst->getDebugLoc(), + UnsafeAlgebraInst->getParent(), VectorizationReport() << "cannot prove it is safe to reorder " "floating-point operations"); Failed = true; @@ -1776,8 +1777,8 @@ public: NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; if ((ThresholdReached && !Hints.allowReordering()) || PragmaThresholdReached) { - emitOptimizationRemarkAnalysisAliasing( - F->getContext(), Name, *F, L->getStartLoc(), + ORE.emitOptimizationRemarkAnalysisAliasing( + Name, L, VectorizationReport() << "cannot prove it is safe to reorder memory operations"); DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); @@ -1790,6 +1791,9 @@ public: private: unsigned NumRuntimePointerChecks; Instruction *UnsafeAlgebraInst; + + /// Interface to emit optimization remarks. + OptimizationRemarkEmitter &ORE; }; static void addInnerLoop(Loop &L, SmallVectorImpl &V) { @@ -6436,7 +6440,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { PredicatedScalarEvolution PSE(*SE, *L); // Check if it is legal to vectorize the loop. - LoopVectorizationRequirements Requirements; + LoopVectorizationRequirements Requirements(*ORE); LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE, &Requirements, &Hints); if (!LVL.canVectorize()) { diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll new file mode 100644 index 0000000..7cb38f6 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-with-hotness 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-with-hotness 2>&1 | FileCheck %s + +; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations (hotness: 300) +; CHECK: remark: no_fpmath.c:6:14: loop not vectorized: +; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2) + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: nounwind readonly ssp uwtable +define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 !prof !29 { +entry: + %cmp.7 = icmp sgt i32 %n, 0, !dbg !3 + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8, !prof !30 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !9 + +for.cond.cleanup.loopexit: ; preds = %for.body + %add.lcssa = phi double [ %add, %for.body ] + br label %for.cond.cleanup, !dbg !10 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] + ret double %a.0.lcssa, !dbg !10 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9 + %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11 + %cmp1 = icmp eq i32 %0, 0, !dbg !15 + %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9 + %add = fadd double %a.08, %cond, !dbg !16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8 + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17, !prof !31 +} + +; Function Attrs: nounwind readonly ssp uwtable +define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 { +entry: + %cmp.7 = icmp sgt i32 %n, 0, !dbg !19 + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !22 + +for.cond.cleanup.loopexit: ; preds = %for.body + %add.lcssa = phi double [ %add, %for.body ] + br label %for.cond.cleanup, !dbg !23 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] + ret double %a.0.lcssa, !dbg !23 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22 + %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11 + %cmp1 = icmp eq i32 %0, 0, !dbg !24 + %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22 + %add = fadd double %a.08, %cond, !dbg !25 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21 + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !21 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !21, !llvm.loop !26 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!28} +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{i32 1, !"PIC Level", i32 2} +!2 = !{!"clang version 3.7.0"} +!3 = !DILocation(line: 5, column: 20, scope: !4) +!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !28, variables: !7) +!5 = !DIFile(filename: "no_fpmath.c", directory: "") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 5, column: 3, scope: !4) +!9 = !DILocation(line: 6, column: 14, scope: !4) +!10 = !DILocation(line: 9, column: 3, scope: !4) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 19, scope: !4) +!16 = !DILocation(line: 6, column: 11, scope: !4) +!17 = distinct !{!17, !18} +!18 = !{!"llvm.loop.unroll.disable"} +!19 = !DILocation(line: 16, column: 20, scope: !20) +!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, variables: !7) +!21 = !DILocation(line: 16, column: 3, scope: !20) +!22 = !DILocation(line: 17, column: 14, scope: !20) +!23 = !DILocation(line: 20, column: 3, scope: !20) +!24 = !DILocation(line: 17, column: 19, scope: !20) +!25 = !DILocation(line: 17, column: 11, scope: !20) +!26 = distinct !{!26, !27, !18} +!27 = !{!"llvm.loop.vectorize.enable", i1 true} +!28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", + file: !5, + isOptimized: true, flags: "-O2", + splitDebugFilename: "abc.debug", emissionKind: 2) +!29 = !{!"function_entry_count", i64 3} +!30 = !{!"branch_weights", i32 99, i32 1} +!31 = !{!"branch_weights", i32 1, i32 99} -- 2.7.4