[MachinePipeliner] Add ORE for MachinePipeliner

author Jinsong Ji <jji@us.ibm.com>

Tue, 5 May 2020 14:27:59 +0000 (14:27 +0000)

committer Jinsong Ji <jji@us.ibm.com>

Tue, 5 May 2020 16:04:53 +0000 (16:04 +0000)
author Jinsong Ji <jji@us.ibm.com>
Tue, 5 May 2020 14:27:59 +0000 (14:27 +0000)
committer Jinsong Ji <jji@us.ibm.com>
Tue, 5 May 2020 16:04:53 +0000 (16:04 +0000)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h

index 49276fb..8b2c27e 100644 (file)
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -43,6 +43,7 @@
  #include "llvm/Analysis/AliasAnalysis.h"
  
  #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  #include "llvm/CodeGen/RegisterClassInfo.h"
  #include "llvm/CodeGen/ScheduleDAGInstrs.h"
  #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -60,6 +61,7 @@ extern cl::opt<bool> SwpEnableCopyToPhi;
  class MachinePipeliner : public MachineFunctionPass {
  public:
    MachineFunction *MF = nullptr;
+  MachineOptimizationRemarkEmitter *ORE = nullptr;
    const MachineLoopInfo *MLI = nullptr;
    const MachineDominatorTree *MDT = nullptr;
    const InstrItineraryData *InstrItins;
@@ -96,6 +98,7 @@ public:
      AU.addRequired<MachineLoopInfo>();
      AU.addRequired<MachineDominatorTree>();
      AU.addRequired<LiveIntervals>();
+    AU.addRequired<MachineOptimizationRemarkEmitterPass>();
      MachineFunctionPass::getAnalysisUsage(AU);
    }
  
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp

index 3465aaa..ef4b02c 100644 (file)
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -217,6 +217,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
    MF = &mf;
    MLI = &getAnalysis<MachineLoopInfo>();
    MDT = &getAnalysis<MachineDominatorTree>();
+  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
    TII = MF->getSubtarget().getInstrInfo();
    RegClassInfo.runOnMachineFunction(*MF);
  
@@ -248,6 +249,12 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
    setPragmaPipelineOptions(L);
    if (!canPipelineLoop(L)) {
      LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
+                                             L.getStartLoc(), L.getHeader())
+             << "Failed to pipeline loop";
+    });
+
      return Changed;
    }
  
@@ -309,11 +316,24 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
  /// restricted to loops with a single basic block.  Make sure that the
  /// branch in the loop can be analyzed.
  bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
-  if (L.getNumBlocks() != 1)
+  if (L.getNumBlocks() != 1) {
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "Not a single basic block: "
+             << ore::NV("NumBlocks", L.getNumBlocks());
+    });
      return false;
+  }
  
-  if (disabledByPragma)
+  if (disabledByPragma) {
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "Disabled by Pragma.";
+    });
      return false;
+  }
  
    // Check if the branch can't be understood because we can't do pipelining
    // if that's the case.
@@ -321,25 +341,37 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
    LI.FBB = nullptr;
    LI.BrCond.clear();
    if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
-    LLVM_DEBUG(
-        dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+    LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
      NumFailBranch++;
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "The branch can't be understood";
+    });
      return false;
    }
  
    LI.LoopInductionVar = nullptr;
    LI.LoopCompare = nullptr;
    if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
-    LLVM_DEBUG(
-        dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+    LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
      NumFailLoop++;
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "The loop structure is not supported";
+    });
      return false;
    }
  
    if (!L.getLoopPreheader()) {
-    LLVM_DEBUG(
-        dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+    LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
      NumFailPreheader++;
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "No loop preheader found";
+    });
      return false;
    }
  
@@ -457,10 +489,13 @@ void SwingSchedulerDAG::schedule() {
  
    // Can't schedule a loop without a valid MII.
    if (MII == 0) {
-    LLVM_DEBUG(
-        dbgs()
-        << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+    LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
      NumFailZeroMII++;
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "Invalid Minimal Initiation Interval: 0";
+    });
      return;
    }
  
@@ -469,6 +504,14 @@ void SwingSchedulerDAG::schedule() {
      LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
                        << ", we don't pipleline large loops\n");
      NumFailLargeMaxMII++;
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "Minimal Initiation Interval too large: "
+             << ore::NV("MII", (int)MII) << " > "
+             << ore::NV("SwpMaxMii", SwpMaxMii) << "."
+             << "Refer to -pipeliner-max-mii.";
+    });
      return;
    }
  
@@ -511,15 +554,24 @@ void SwingSchedulerDAG::schedule() {
    if (!Scheduled){
      LLVM_DEBUG(dbgs() << "No schedule found, return\n");
      NumFailNoSchedule++;
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "Unable to find schedule";
+    });
      return;
    }
  
    unsigned numStages = Schedule.getMaxStageCount();
    // No need to generate pipeline if there are no overlapped iterations.
    if (numStages == 0) {
-    LLVM_DEBUG(
-        dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+    LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
      NumFailZeroStage++;
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "No need to pipeline - no overlapped iterations in schedule.";
+    });
      return;
    }
    // Check that the maximum stage count is less than user-defined limit.
@@ -527,9 +579,23 @@ void SwingSchedulerDAG::schedule() {
      LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
                        << " : too many stages, abort\n");
      NumFailLargeMaxStage++;
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "Too many stages in schedule: "
+             << ore::NV("numStages", (int)numStages) << " > "
+             << ore::NV("SwpMaxStages", SwpMaxStages)
+             << ". Refer to -pipeliner-max-stages.";
+    });
      return;
    }
  
+  Pass.ORE->emit([&]() {
+    return MachineOptimizationRemark(DEBUG_TYPE, "schedule", Loop.getStartLoc(),
+                                     Loop.getHeader())
+           << "Pipelined succesfully!";
+  });
+
    // Generate the schedule as a ModuloSchedule.
    DenseMap<MachineInstr *, int> Cycles, Stages;
    std::vector<MachineInstr *> OrderedInsts;
@@ -1080,7 +1146,7 @@ unsigned SwingSchedulerDAG::calculateResMII() {
      }
    }
    int Resmii = Resources.size();
-  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
+  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
    // Delete the memory for each of the DFAs that were created earlier.
    for (ResourceManager *RI : Resources) {
      ResourceManager *D = RI;
@@ -2052,9 +2118,16 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
    LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
                      << ")\n");
  
-  if (scheduleFound)
+  if (scheduleFound) {
      Schedule.finalizeSchedule(this);
-  else
+    Pass.ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(
+                 DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
+             << "Schedule found with Initiation Interval: " << ore::NV("II", II)
+             << ", MaxStageCount: "
+             << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
+    });
+  } else
      Schedule.reset();
  
    return scheduleFound && Schedule.getMaxStageCount() > 0;
diff --git a/llvm/test/CodeGen/PowerPC/sms-remark.ll b/llvm/test/CodeGen/PowerPC/sms-remark.ll

new file mode 100644 (file)

index 0000000..647b56f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sms-remark.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:       -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr9 --ppc-enable-pipeliner \
+; RUN:       -pass-remarks-analysis=pipeliner -pass-remarks=pipeliner -o /dev/null 2>&1 \
+; RUN:       | FileCheck %s
+
+@x = dso_local local_unnamed_addr global <{ i32, i32, i32, i32, [1020 x i32] }> <{ i32 1, i32 2, i32 3, i32 4, [1020 x i32] zeroinitializer }>, align 4
+@y = dso_local global [1024 x i32] zeroinitializer, align 4
+
+define dso_local i32* @foo() local_unnamed_addr {
+;CHECK: Schedule found with Initiation Interval
+;CHECK: Pipelined succesfully!
+entry:
+  %.pre = load i32, i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0), align 4
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32* getelementptr inbounds ([1024 x i32], [1024 x i32]* @y, i64 0, i64 0)
+
+for.body:                                         ; preds = %for.body, %entry
+  %0 = phi i32 [ %.pre, %entry ], [ %add.2, %for.body ]
+  %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next.2, %for.body ]
+  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %1, %1
+  %add = add nsw i32 %mul, %0
+  %arrayidx6 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx2.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next
+  %2 = load i32, i32* %arrayidx2.1, align 4
+  %mul.1 = mul nsw i32 %2, %2
+  %add.1 = add nsw i32 %mul.1, %add
+  %arrayidx6.1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next
+  store i32 %add.1, i32* %arrayidx6.1, align 4
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
+  %arrayidx2.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* bitcast (<{ i32, i32, i32, i32, [1020 x i32] }>* @x to [1024 x i32]*), i64 0, i64 %indvars.iv.next.1
+  %3 = load i32, i32* %arrayidx2.2, align 4
+  %mul.2 = mul nsw i32 %3, %3
+  %add.2 = add nsw i32 %mul.2, %add.1
+  %arrayidx6.2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @y, i64 0, i64 %indvars.iv.next.1
+  store i32 %add.2, i32* %arrayidx6.2, align 4
+  %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv, 3
+  %exitcond.2 = icmp eq i64 %indvars.iv.next.2, 1024
+  br i1 %exitcond.2, label %for.cond.cleanup, label %for.body
+}
author	Jinsong Ji <jji@us.ibm.com>
	Tue, 5 May 2020 14:27:59 +0000 (14:27 +0000)
committer	Jinsong Ji <jji@us.ibm.com>
	Tue, 5 May 2020 16:04:53 +0000 (16:04 +0000)
llvm/include/llvm/CodeGen/MachinePipeliner.h		patch \| blob \| history
llvm/lib/CodeGen/MachinePipeliner.cpp		patch \| blob \| history
llvm/test/CodeGen/PowerPC/sms-remark.ll	[new file with mode: 0644]	patch \| blob