From db63088ea7cc8890972e875c87550609e6bfaf4a Mon Sep 17 00:00:00 2001
From: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>
Date: Thu, 16 Aug 2018 19:00:48 +0000
Subject: [PATCH] [llvm-mca] Refactor how execution is orchestrated by the
 Pipeline.

This patch changes how instruction execution is orchestrated by the Pipeline.
In particular, this patch makes it more explicit how instructions transition
through the various pipeline stages during execution.

The main goal is to simplify both the stage API and the Pipeline execution.  At
the same time, this patch fixes some design issues which are currently latent,
but that are likely to cause problems in future if people start defining custom
pipelines.

The new design assumes that each pipeline stage knows the "next-in-sequence".
The Stage API has gained three new methods:
 -   isAvailable(IR)
 -   checkNextStage(IR)
 -   moveToTheNextStage(IR).

An instruction IR can be executed by a Stage if method `Stage::isAvailable(IR)`
returns true.
Instructions can move to next stages using method moveToTheNextStage(IR).
An instruction cannot be moved to the next stage if method checkNextStage(IR)
(called on the current stage) returns false.
Stages are now responsible for moving instructions to the next stage in sequence
if necessary.

Instructions are allowed to transition through multiple stages during a single
cycle (as long as stages are available, and as long as all the calls to
`checkNextStage(IR)` returns true).

Methods `Stage::preExecute()` and `Stage::postExecute()` have now become
redundant, and those are removed by this patch.

Method Pipeline::runCycle() is now simpler, and it correctly visits stages
on every begin/end of cycle.

Other changes:
 - DispatchStage no longer requires a reference to the Scheduler.
 - ExecuteStage no longer needs to directly interact with the
   RetireControlUnit. Instead, executed instructions are now directly moved to the
   next stage (i.e. the retire stage).
 - RetireStage gained an execute method. This allowed us to remove the
   dependency with the RCU in ExecuteStage.
 - FecthStage now updates the "program counter" during cycleBegin() (i.e.
   before we start executing new instructions).
 - We no longer need Stage::Status to be returned by method execute(). It has
   been dropped in favor of a more lightweight llvm::Error.

Overally, I measured a ~11% performance gain w.r.t. the previous design.  I also
think that the Stage interface is probably easier to read now.  That being said,
code comments have to be improved, and I plan to do it in a follow-up patch.

Differential revision: https://reviews.llvm.org/D50849

llvm-svn: 339923
---
 llvm/tools/llvm-mca/Context.cpp           | 22 ++++++-------
 llvm/tools/llvm-mca/DispatchStage.cpp     | 32 ++++++++++--------
 llvm/tools/llvm-mca/DispatchStage.h       | 30 ++++++-----------
 llvm/tools/llvm-mca/ExecuteStage.cpp      | 37 ++++++++++++++++-----
 llvm/tools/llvm-mca/ExecuteStage.h        |  9 ++---
 llvm/tools/llvm-mca/FetchStage.cpp        | 46 +++++++++++++++++++++-----
 llvm/tools/llvm-mca/FetchStage.h          | 12 +++++--
 llvm/tools/llvm-mca/InstructionTables.cpp |  4 +--
 llvm/tools/llvm-mca/InstructionTables.h   |  6 ++--
 llvm/tools/llvm-mca/Pipeline.cpp          | 55 ++++++++++---------------------
 llvm/tools/llvm-mca/Pipeline.h            |  6 +---
 llvm/tools/llvm-mca/RetireStage.cpp       |  5 +++
 llvm/tools/llvm-mca/RetireStage.h         |  3 +-
 llvm/tools/llvm-mca/Stage.h               | 53 ++++++++++++++---------------
 14 files changed, 171 insertions(+), 149 deletions(-)
diff --git a/llvm/tools/llvm-mca/Context.cpp b/llvm/tools/llvm-mca/Context.cpp
index 685714e..4fa584c 100644
--- a/llvm/tools/llvm-mca/Context.cpp
+++ b/llvm/tools/llvm-mca/Context.cpp
@@ -40,12 +40,12 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
       SM, Opts.LoadQueueSize, Opts.StoreQueueSize, Opts.AssumeNoAlias);
 
   // Create the pipeline and its stages.
-  auto P = llvm::make_unique<Pipeline>();
-  auto F = llvm::make_unique<FetchStage>(IB, SrcMgr);
-  auto D = llvm::make_unique<DispatchStage>(
-      STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF, *HWS);
-  auto R = llvm::make_unique<RetireStage>(*RCU, *PRF);
-  auto E = llvm::make_unique<ExecuteStage>(*RCU, *HWS);
+  auto StagePipeline = llvm::make_unique<Pipeline>();
+  auto Fetch = llvm::make_unique<FetchStage>(IB, SrcMgr);
+  auto Dispatch = llvm::make_unique<DispatchStage>(
+      STI, MRI, Opts.RegisterFileSize, Opts.DispatchWidth, *RCU, *PRF);
+  auto Execute = llvm::make_unique<ExecuteStage>(*HWS);
+  auto Retire = llvm::make_unique<RetireStage>(*RCU, *PRF);
 
   // Add the hardware to the context.
   addHardwareUnit(std::move(RCU));
@@ -53,11 +53,11 @@ Context::createDefaultPipeline(const PipelineOptions &Opts, InstrBuilder &IB,
   addHardwareUnit(std::move(HWS));
 
   // Build the pipeline.
-  P->appendStage(std::move(F));
-  P->appendStage(std::move(D));
-  P->appendStage(std::move(R));
-  P->appendStage(std::move(E));
-  return P;
+  StagePipeline->appendStage(std::move(Fetch));
+  StagePipeline->appendStage(std::move(Dispatch));
+  StagePipeline->appendStage(std::move(Execute));
+  StagePipeline->appendStage(std::move(Retire));
+  return StagePipeline;
 }
 
 } // namespace mca
diff --git a/llvm/tools/llvm-mca/DispatchStage.cpp b/llvm/tools/llvm-mca/DispatchStage.cpp
index 4f2dbee..87b8179 100644
--- a/llvm/tools/llvm-mca/DispatchStage.cpp
+++ b/llvm/tools/llvm-mca/DispatchStage.cpp
@@ -59,12 +59,8 @@ bool DispatchStage::checkRCU(const InstRef &IR) const {
   return false;
 }
 
-bool DispatchStage::checkScheduler(const InstRef &IR) const {
-  HWStallEvent::GenericEventType Event;
-  const bool Ready = SC.canBeDispatched(IR, Event);
-  if (!Ready)
-    notifyEvent<HWStallEvent>(HWStallEvent(Event, IR));
-  return Ready;
+bool DispatchStage::canDispatch(const InstRef &IR) const {
+  return checkRCU(IR) && checkPRF(IR) && checkNextStage(IR);
 }
 
 void DispatchStage::updateRAWDependencies(ReadState &RS,
@@ -87,7 +83,7 @@ void DispatchStage::updateRAWDependencies(ReadState &RS,
   }
 }
 
-void DispatchStage::dispatch(InstRef IR) {
+llvm::Error DispatchStage::dispatch(InstRef IR) {
   assert(!CarryOver && "Cannot dispatch another instruction!");
   Instruction &IS = *IR.getInstruction();
   const InstrDesc &Desc = IS.getDesc();
@@ -127,8 +123,10 @@ void DispatchStage::dispatch(InstRef IR) {
   // dispatched to the schedulers for execution.
   IS.dispatch(RCU.reserveSlot(IR, NumMicroOps));
 
-  // Notify listeners of the "instruction dispatched" event.
+  // Notify listeners of the "instruction dispatched" event,
+  // and move IR to the next stage.
   notifyInstructionDispatched(IR, RegisterFiles);
+  return std::move(moveToTheNextStage(IR));
 }
 
 llvm::Error DispatchStage::cycleStart() {
@@ -137,12 +135,20 @@ llvm::Error DispatchStage::cycleStart() {
   return llvm::ErrorSuccess();
 }
 
-Stage::Status DispatchStage::execute(InstRef &IR) {
+bool DispatchStage::isAvailable(const InstRef &IR) const {
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
-  if (!isAvailable(Desc.NumMicroOps) || !canDispatch(IR))
-    return Stage::Stop;
-  dispatch(IR);
-  return Stage::Continue;
+  unsigned Required = std::min(Desc.NumMicroOps, DispatchWidth);
+  if (Required > AvailableEntries)
+    return false;
+  // The dispatch logic doesn't internally buffer instructions.  It only accepts
+  // instructions that can be successfully moved to the next stage during this
+  // same cycle.
+  return canDispatch(IR);
+}
+
+llvm::Error DispatchStage::execute(InstRef &IR) {
+  assert(canDispatch(IR) && "Cannot dispatch another instruction!");
+  return std::move(dispatch(IR));
 }
 
 #ifndef NDEBUG
diff --git a/llvm/tools/llvm-mca/DispatchStage.h b/llvm/tools/llvm-mca/DispatchStage.h
index 2312f86..3419d7f 100644
--- a/llvm/tools/llvm-mca/DispatchStage.h
+++ b/llvm/tools/llvm-mca/DispatchStage.h
@@ -29,8 +29,6 @@
 
 namespace mca {
 
-class Scheduler;
-
 // Implements the hardware dispatch logic.
 //
 // This class is responsible for the dispatch stage, in which instructions are
@@ -56,26 +54,17 @@ class DispatchStage final : public Stage {
   const llvm::MCSubtargetInfo &STI;
   RetireControlUnit &RCU;
   RegisterFile &PRF;
-  Scheduler &SC;
 
   bool checkRCU(const InstRef &IR) const;
   bool checkPRF(const InstRef &IR) const;
-  bool checkScheduler(const InstRef &IR) const;
-  void dispatch(InstRef IR);
+  bool canDispatch(const InstRef &IR) const;
+  llvm::Error dispatch(InstRef IR);
+
   void updateRAWDependencies(ReadState &RS, const llvm::MCSubtargetInfo &STI);
 
   void notifyInstructionDispatched(const InstRef &IR,
                                    llvm::ArrayRef<unsigned> UsedPhysRegs);
 
-  bool isAvailable(unsigned NumEntries) const {
-    return NumEntries <= AvailableEntries || AvailableEntries == DispatchWidth;
-  }
-
-  bool canDispatch(const InstRef &IR) {
-    assert(isAvailable(IR.getInstruction()->getDesc().NumMicroOps));
-    return checkRCU(IR) && checkPRF(IR) && checkScheduler(IR);
-  }
-
   void collectWrites(llvm::SmallVectorImpl<WriteRef> &Vec,
                      unsigned RegID) const {
     return PRF.collectWrites(Vec, RegID);
@@ -85,16 +74,17 @@ public:
   DispatchStage(const llvm::MCSubtargetInfo &Subtarget,
                 const llvm::MCRegisterInfo &MRI, unsigned RegisterFileSize,
                 unsigned MaxDispatchWidth, RetireControlUnit &R,
-                RegisterFile &F, Scheduler &Sched)
+                RegisterFile &F)
       : DispatchWidth(MaxDispatchWidth), AvailableEntries(MaxDispatchWidth),
-        CarryOver(0U), STI(Subtarget), RCU(R), PRF(F), SC(Sched) {}
+        CarryOver(0U), STI(Subtarget), RCU(R), PRF(F) {}
+
+  bool isAvailable(const InstRef &IR) const override;
 
-  // We can always try to dispatch, so returning false is okay in this case.
-  // The retire stage, which controls the RCU, might have items to complete but
-  // RetireStage::hasWorkToComplete will check for that case.
+  // The dispatch logic internally doesn't buffer instructions. So there is
+  // never work to do at the beginning of every cycle.
   bool hasWorkToComplete() const override { return false; }
   llvm::Error cycleStart() override;
-  Status execute(InstRef &IR) override;
+  llvm::Error execute(InstRef &IR) override;
   void notifyDispatchStall(const InstRef &IR, unsigned EventType);
 
 #ifndef NDEBUG
diff --git a/llvm/tools/llvm-mca/ExecuteStage.cpp b/llvm/tools/llvm-mca/ExecuteStage.cpp
index f9abf9f..f3c441b 100644
--- a/llvm/tools/llvm-mca/ExecuteStage.cpp
+++ b/llvm/tools/llvm-mca/ExecuteStage.cpp
@@ -26,6 +26,14 @@ namespace mca {
 
 using namespace llvm;
 
+bool ExecuteStage::isAvailable(const InstRef &IR) const {
+  HWStallEvent::GenericEventType Event;
+  if (HWS.canBeDispatched(IR, Event))
+    return true;
+  notifyEvent<HWStallEvent>(HWStallEvent(Event, IR));
+  return false;
+}
+
 // Reclaim the simulated resources used by the scheduler.
 void ExecuteStage::reclaimSchedulerResources() {
   SmallVector<ResourceRef, 8> ResourcesFreed;
@@ -38,8 +46,12 @@ void ExecuteStage::reclaimSchedulerResources() {
 Error ExecuteStage::updateSchedulerQueues() {
   SmallVector<InstRef, 4> InstructionIDs;
   HWS.updateIssuedSet(InstructionIDs);
-  for (const InstRef &IR : InstructionIDs)
+  for (InstRef &IR : InstructionIDs) {
     notifyInstructionExecuted(IR);
+    //FIXME: add a buffer of executed instructions.
+    if (Error S = moveToTheNextStage(IR))
+      return S;
+  }
   InstructionIDs.clear();
 
   HWS.updatePendingQueue(InstructionIDs);
@@ -60,8 +72,12 @@ Error ExecuteStage::issueReadyInstructions() {
     const InstrDesc &Desc = IR.getInstruction()->getDesc();
     notifyReleasedBuffers(Desc.Buffers);
     notifyInstructionIssued(IR, Used);
-    if (IR.getInstruction()->isExecuted())
+    if (IR.getInstruction()->isExecuted()) {
       notifyInstructionExecuted(IR);
+      //FIXME: add a buffer of executed instructions.
+      if (Error S = moveToTheNextStage(IR))
+        return S;
+    }
 
     // Instructions that have been issued during this cycle might have unblocked
     // other dependent instructions. Dependent instructions may be issued during
@@ -100,7 +116,9 @@ Error ExecuteStage::cycleStart() {
 }
 
 // Schedule the instruction for execution on the hardware.
-Stage::Status ExecuteStage::execute(InstRef &IR) {
+Error ExecuteStage::execute(InstRef &IR) {
+  assert(isAvailable(IR) && "Scheduler is not available!");
+
 #ifndef NDEBUG
   // Ensure that the HWS has not stored this instruction in its queues.
   HWS.sanityCheck(IR);
@@ -116,7 +134,7 @@ Stage::Status ExecuteStage::execute(InstRef &IR) {
   // Obtain a slot in the LSU.  If we cannot reserve resources, return true, so
   // that succeeding stages can make progress.
   if (!HWS.reserveResources(IR))
-    return Stage::Continue;
+    return ErrorSuccess();
 
   // If we did not return early, then the scheduler is ready for execution.
   notifyInstructionReady(IR);
@@ -137,7 +155,7 @@ Stage::Status ExecuteStage::execute(InstRef &IR) {
   // If we cannot issue immediately, the HWS will add IR to its ready queue for
   // execution later, so we must return early here.
   if (!HWS.issueImmediately(IR))
-    return Stage::Continue;
+    return ErrorSuccess();
 
   LLVM_DEBUG(dbgs() << "[SCHEDULER] Instruction #" << IR
                     << " issued immediately\n");
@@ -149,10 +167,12 @@ Stage::Status ExecuteStage::execute(InstRef &IR) {
   // Perform notifications.
   notifyReleasedBuffers(Desc.Buffers);
   notifyInstructionIssued(IR, Used);
-  if (IR.getInstruction()->isExecuted())
+  if (IR.getInstruction()->isExecuted()) {
     notifyInstructionExecuted(IR);
-
-  return Stage::Continue;
+    //FIXME: add a buffer of executed instructions.
+    return std::move(moveToTheNextStage(IR));
+  }
+  return ErrorSuccess();
 }
 
 void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
@@ -160,7 +180,6 @@ void ExecuteStage::notifyInstructionExecuted(const InstRef &IR) {
   LLVM_DEBUG(dbgs() << "[E] Instruction Executed: #" << IR << '\n');
   notifyEvent<HWInstructionEvent>(
       HWInstructionEvent(HWInstructionEvent::Executed, IR));
-  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
 }
 
 void ExecuteStage::notifyInstructionReady(const InstRef &IR) {
diff --git a/llvm/tools/llvm-mca/ExecuteStage.h b/llvm/tools/llvm-mca/ExecuteStage.h
index d22cca0..85938d6 100644
--- a/llvm/tools/llvm-mca/ExecuteStage.h
+++ b/llvm/tools/llvm-mca/ExecuteStage.h
@@ -19,7 +19,6 @@
 #define LLVM_TOOLS_LLVM_MCA_EXECUTE_STAGE_H
 
 #include "Instruction.h"
-#include "RetireControlUnit.h"
 #include "Scheduler.h"
 #include "Stage.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -27,8 +26,6 @@
 namespace mca {
 
 class ExecuteStage final : public Stage {
-  // Owner will go away when we move listeners/eventing to the stages.
-  RetireControlUnit &RCU;
   Scheduler &HWS;
 
   // The following routines are used to maintain the HWS.
@@ -40,7 +37,7 @@ class ExecuteStage final : public Stage {
   ExecuteStage &operator=(const ExecuteStage &Other) = delete;
 
 public:
-  ExecuteStage(RetireControlUnit &R, Scheduler &S) : Stage(), RCU(R), HWS(S) {}
+  ExecuteStage(Scheduler &S) : Stage(), HWS(S) {}
 
   // This stage works under the assumption that the Pipeline will eventually
   // execute a retire stage. We don't need to check if pipelines and/or
@@ -49,9 +46,9 @@ public:
   // RetireControlUnit::hasWorkToComplete() is responsible for checking if there
   // are still instructions in-flight in the out-of-order backend.
   bool hasWorkToComplete() const override { return false; }
-
+  bool isAvailable(const InstRef &IR) const override;
   llvm::Error cycleStart() override;
-  Status execute(InstRef &IR) override;
+  llvm::Error execute(InstRef &IR) override;
 
   void
   notifyInstructionIssued(const InstRef &IR,
diff --git a/llvm/tools/llvm-mca/FetchStage.cpp b/llvm/tools/llvm-mca/FetchStage.cpp
index 1a6b939..60c9e6f 100644
--- a/llvm/tools/llvm-mca/FetchStage.cpp
+++ b/llvm/tools/llvm-mca/FetchStage.cpp
@@ -17,23 +17,53 @@
 
 namespace mca {
 
-bool FetchStage::hasWorkToComplete() const { return SM.hasNext(); }
+bool FetchStage::hasWorkToComplete() const {
+  return CurrentInstruction.get() || SM.hasNext();
+}
+
+bool FetchStage::isAvailable(const InstRef & /* unused */) const {
+  if (!CurrentInstruction)
+    return false;
+  assert(SM.hasNext() && "Unexpected internal state!");
+  const SourceRef SR = SM.peekNext();
+  InstRef IR(SR.first, CurrentInstruction.get());
+  return checkNextStage(IR);
+}
 
-Stage::Status FetchStage::execute(InstRef &IR) {
+llvm::Error FetchStage::getNextInstruction() {
+  assert(!CurrentInstruction && "There is already an instruction to process!");
   if (!SM.hasNext())
-    return Stage::Stop;
+    return llvm::ErrorSuccess();
   const SourceRef SR = SM.peekNext();
   llvm::Expected<std::unique_ptr<Instruction>> InstOrErr =
       IB.createInstruction(*SR.second);
   if (!InstOrErr)
     return InstOrErr.takeError();
-  std::unique_ptr<Instruction> I = std::move(*InstOrErr);
-  IR = InstRef(SR.first, I.get());
-  Instructions[IR.getSourceIndex()] = std::move(I);
-  return Stage::Continue;
+  CurrentInstruction = std::move(InstOrErr.get());
+  return llvm::ErrorSuccess();
 }
 
-void FetchStage::postExecute() { SM.updateNext(); }
+llvm::Error FetchStage::execute(InstRef & /*unused */) {
+  assert(CurrentInstruction && "There is no instruction to process!");
+  const SourceRef SR = SM.peekNext();
+  InstRef IR(SR.first, CurrentInstruction.get());
+  assert(checkNextStage(IR) && "Invalid fetch!");
+
+  Instructions[IR.getSourceIndex()] = std::move(CurrentInstruction);
+  if (llvm::Error Val = moveToTheNextStage(IR))
+    return Val;
+
+  SM.updateNext();
+
+  // Move the program counter.
+  return getNextInstruction();
+}
+
+llvm::Error FetchStage::cycleStart() {
+  if (!CurrentInstruction && SM.hasNext())
+    return getNextInstruction();
+  return llvm::ErrorSuccess();
+}
 
 llvm::Error FetchStage::cycleEnd() {
   // Find the first instruction which hasn't been retired.
diff --git a/llvm/tools/llvm-mca/FetchStage.h b/llvm/tools/llvm-mca/FetchStage.h
index e028cd0..a0ab68a 100644
--- a/llvm/tools/llvm-mca/FetchStage.h
+++ b/llvm/tools/llvm-mca/FetchStage.h
@@ -24,20 +24,26 @@
 namespace mca {
 
 class FetchStage final : public Stage {
+  std::unique_ptr<Instruction> CurrentInstruction;
   using InstMap = std::map<unsigned, std::unique_ptr<Instruction>>;
   InstMap Instructions;
   InstrBuilder &IB;
   SourceMgr &SM;
 
+  // Updates the program counter, and sets 'CurrentInstruction'.
+  llvm::Error getNextInstruction();
+
   FetchStage(const FetchStage &Other) = delete;
   FetchStage &operator=(const FetchStage &Other) = delete;
 
 public:
-  FetchStage(InstrBuilder &IB, SourceMgr &SM) : IB(IB), SM(SM) {}
+  FetchStage(InstrBuilder &IB, SourceMgr &SM)
+      : CurrentInstruction(), IB(IB), SM(SM) {}
 
+  bool isAvailable(const InstRef &IR) const override;
   bool hasWorkToComplete() const override;
-  Status execute(InstRef &IR) override;
-  void postExecute() override;
+  llvm::Error execute(InstRef &IR) override;
+  llvm::Error cycleStart() override;
   llvm::Error cycleEnd() override;
 };
 
diff --git a/llvm/tools/llvm-mca/InstructionTables.cpp b/llvm/tools/llvm-mca/InstructionTables.cpp
index c786cfb..be948d8 100644
--- a/llvm/tools/llvm-mca/InstructionTables.cpp
+++ b/llvm/tools/llvm-mca/InstructionTables.cpp
@@ -21,7 +21,7 @@ namespace mca {
 
 using namespace llvm;
 
-Stage::Status InstructionTables::execute(InstRef &IR) {
+Error InstructionTables::execute(InstRef &IR) {
   ArrayRef<uint64_t> Masks = IB.getProcResourceMasks();
   const InstrDesc &Desc = IR.getInstruction()->getDesc();
   UsedResources.clear();
@@ -64,7 +64,7 @@ Stage::Status InstructionTables::execute(InstRef &IR) {
   // Send a fake instruction issued event to all the views.
   HWInstructionIssuedEvent Event(IR, UsedResources);
   notifyEvent<HWInstructionIssuedEvent>(Event);
-  return Stage::Continue;
+  return ErrorSuccess();
 }
 
 } // namespace mca
diff --git a/llvm/tools/llvm-mca/InstructionTables.h b/llvm/tools/llvm-mca/InstructionTables.h
index 01ece0c..4acc672 100644
--- a/llvm/tools/llvm-mca/InstructionTables.h
+++ b/llvm/tools/llvm-mca/InstructionTables.h
@@ -26,7 +26,7 @@
 
 namespace mca {
 
-class InstructionTables : public Stage {
+class InstructionTables final : public Stage {
   const llvm::MCSchedModel &SM;
   InstrBuilder &IB;
   llvm::SmallVector<std::pair<ResourceRef, double>, 4> UsedResources;
@@ -35,8 +35,8 @@ public:
   InstructionTables(const llvm::MCSchedModel &Model, InstrBuilder &Builder)
       : Stage(), SM(Model), IB(Builder) {}
 
-  bool hasWorkToComplete() const override final { return false; }
-  Status execute(InstRef &IR) override final;
+  bool hasWorkToComplete() const override { return false; }
+  llvm::Error execute(InstRef &IR) override;
 };
 } // namespace mca
 
diff --git a/llvm/tools/llvm-mca/Pipeline.cpp b/llvm/tools/llvm-mca/Pipeline.cpp
index 91c7a5d..a67ae98 100644
--- a/llvm/tools/llvm-mca/Pipeline.cpp
+++ b/llvm/tools/llvm-mca/Pipeline.cpp
@@ -37,29 +37,6 @@ bool Pipeline::hasWorkToProcess() {
   });
 }
 
-// This routine returns early if any stage returns 'false' after execute() is
-// called on it.
-Stage::Status Pipeline::executeStages(InstRef &IR) {
-  for (const std::unique_ptr<Stage> &S : Stages) {
-    Stage::Status StatusOrErr = S->execute(IR);
-    if (!StatusOrErr)
-      return StatusOrErr.takeError();
-    else if (StatusOrErr.get() == Stage::Stop)
-      return Stage::Stop;
-  }
-  return Stage::Continue;
-}
-
-void Pipeline::preExecuteStages() {
-  for (const std::unique_ptr<Stage> &S : Stages)
-    S->preExecute();
-}
-
-void Pipeline::postExecuteStages() {
-  for (const std::unique_ptr<Stage> &S : Stages)
-    S->postExecute();
-}
-
 llvm::Error Pipeline::run() {
   assert(!Stages.empty() && "Unexpected empty pipeline found!");
 
@@ -74,36 +51,38 @@ llvm::Error Pipeline::run() {
 }
 
 llvm::Error Pipeline::runCycle() {
-  // Update stages before we start processing new instructions.
   llvm::Error Err = llvm::ErrorSuccess();
-  for (auto I = Stages.begin(), E = Stages.end(); I != E && !Err; ++I) {
+  // Update stages before we start processing new instructions.
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
     const std::unique_ptr<Stage> &S = *I;
     Err = S->cycleStart();
   }
 
-  if (Err)
-    return Err;
-
   // Now fetch and execute new instructions.
   InstRef IR;
-  while (true) {
-    preExecuteStages();
-    Stage::Status Val = executeStages(IR);
-    if (!Val)
-      return Val.takeError();
-    if (Val.get() == Stage::Stop)
-      break;
-    postExecuteStages();
-  }
+  Stage &FirstStage = *Stages[0];
+  while (!Err && FirstStage.isAvailable(IR))
+    Err = FirstStage.execute(IR);
 
   // Update stages in preparation for a new cycle.
-  for (auto I = Stages.begin(), E = Stages.end(); I != E && !Err; ++I) {
+  for (auto I = Stages.rbegin(), E = Stages.rend(); I != E && !Err; ++I) {
     const std::unique_ptr<Stage> &S = *I;
     Err = S->cycleEnd();
   }
+
   return Err;
 }
 
+void Pipeline::appendStage(std::unique_ptr<Stage> S) {
+  assert(S && "Invalid null stage in input!");
+  if (!Stages.empty()) {
+    Stage *Last = Stages.back().get();
+    Last->setNextInSequence(S.get());
+  }
+
+  Stages.push_back(std::move(S));
+}
+
 void Pipeline::notifyCycleBegin() {
   LLVM_DEBUG(dbgs() << "[E] Cycle begin: " << Cycles << '\n');
   for (HWEventListener *Listener : Listeners)
diff --git a/llvm/tools/llvm-mca/Pipeline.h b/llvm/tools/llvm-mca/Pipeline.h
index 5cf1afb..dfc3fc1 100644
--- a/llvm/tools/llvm-mca/Pipeline.h
+++ b/llvm/tools/llvm-mca/Pipeline.h
@@ -60,18 +60,14 @@ class Pipeline {
   std::set<HWEventListener *> Listeners;
   unsigned Cycles;
 
-  void preExecuteStages();
-  Stage::Status executeStages(InstRef &IR);
-  void postExecuteStages();
   llvm::Error runCycle();
-
   bool hasWorkToProcess();
   void notifyCycleBegin();
   void notifyCycleEnd();
 
 public:
   Pipeline() : Cycles(0) {}
-  void appendStage(std::unique_ptr<Stage> S) { Stages.push_back(std::move(S)); }
+  void appendStage(std::unique_ptr<Stage> S);
   llvm::Error run();
   void addEventListener(HWEventListener *Listener);
 };
diff --git a/llvm/tools/llvm-mca/RetireStage.cpp b/llvm/tools/llvm-mca/RetireStage.cpp
index 43e3364..4e0f380 100644
--- a/llvm/tools/llvm-mca/RetireStage.cpp
+++ b/llvm/tools/llvm-mca/RetireStage.cpp
@@ -42,6 +42,11 @@ llvm::Error RetireStage::cycleStart() {
   return llvm::ErrorSuccess();
 }
 
+llvm::Error RetireStage::execute(InstRef &IR) {
+  RCU.onInstructionExecuted(IR.getInstruction()->getRCUTokenID());
+  return llvm::ErrorSuccess();
+}
+
 void RetireStage::notifyInstructionRetired(const InstRef &IR) {
   LLVM_DEBUG(llvm::dbgs() << "[E] Instruction Retired: #" << IR << '\n');
   llvm::SmallVector<unsigned, 4> FreedRegs(PRF.getNumRegisterFiles());
diff --git a/llvm/tools/llvm-mca/RetireStage.h b/llvm/tools/llvm-mca/RetireStage.h
index 36af447..b4432ca 100644
--- a/llvm/tools/llvm-mca/RetireStage.h
+++ b/llvm/tools/llvm-mca/RetireStage.h
@@ -37,9 +37,8 @@ public:
 
   bool hasWorkToComplete() const override { return !RCU.isEmpty(); }
   llvm::Error cycleStart() override;
-  Status execute(InstRef &IR) override { return Stage::Continue; }
+  llvm::Error execute(InstRef &IR) override;
   void notifyInstructionRetired(const InstRef &IR);
-  void onInstructionExecuted(unsigned TokenID);
 };
 
 } // namespace mca
diff --git a/llvm/tools/llvm-mca/Stage.h b/llvm/tools/llvm-mca/Stage.h
index c9ca1b2..69efb26 100644
--- a/llvm/tools/llvm-mca/Stage.h
+++ b/llvm/tools/llvm-mca/Stage.h
@@ -25,35 +25,23 @@ namespace mca {
 class InstRef;
 
 class Stage {
+  Stage *NextInSequence;
   std::set<HWEventListener *> Listeners;
 
   Stage(const Stage &Other) = delete;
   Stage &operator=(const Stage &Other) = delete;
 
-public:
-  /// A Stage's execute() returns Continue, Stop, or an error.  Returning
-  /// Continue means that the stage successfully completed its 'execute'
-  /// action, and that the instruction being processed can be moved to the next
-  /// pipeline stage during this cycle.  Continue allows the pipeline to
-  /// continue calling 'execute' on subsequent stages.  Returning Stop
-  /// signifies that the stage ran into an error, and tells the pipeline to stop
-  /// passing the instruction to subsequent stages during this cycle.  Any
-  /// failures that occur during 'execute' are represented by the error variant
-  /// that is provided by the Expected template.
-  enum State { Stop, Continue };
-  using Status = llvm::Expected<State>;
-
 protected:
   const std::set<HWEventListener *> &getListeners() const { return Listeners; }
 
 public:
-  Stage() {}
+  Stage() : NextInSequence(nullptr) {}
   virtual ~Stage();
 
-  /// Called prior to preExecute to ensure that the stage has items that it
-  /// is to process.  For example, a FetchStage might have more instructions
-  /// that need to be processed, or a RCU might have items that have yet to
-  /// retire.
+  /// Returns true if it can execute IR during this cycle.
+  virtual bool isAvailable(const InstRef &IR) const { return true; }
+
+  /// Returns true if some instructions are still executing this stage.
   virtual bool hasWorkToComplete() const = 0;
 
   /// Called once at the start of each cycle.  This can be used as a setup
@@ -63,19 +51,26 @@ public:
   /// Called once at the end of each cycle.
   virtual llvm::Error cycleEnd() { return llvm::ErrorSuccess(); }
 
-  /// Called prior to executing the list of stages.
-  /// This can be called multiple times per cycle.
-  virtual void preExecute() {}
+  /// The primary action that this stage performs on instruction IR.
+  virtual llvm::Error execute(InstRef &IR) = 0;
 
-  /// Called as a cleanup and finalization phase after each execution.
-  /// This will only be called if all stages return a success from their
-  /// execute callback.  This can be called multiple times per cycle.
-  virtual void postExecute() {}
+  void setNextInSequence(Stage *NextStage) {
+    assert(!NextInSequence && "This stage already has a NextInSequence!");
+    NextInSequence = NextStage;
+  }
 
-  /// The primary action that this stage performs.
-  /// Returning false prevents successor stages from having their 'execute'
-  /// routine called.  This can be called multiple times during a single cycle.
-  virtual Status execute(InstRef &IR) = 0;
+  bool checkNextStage(const InstRef &IR) const {
+    return NextInSequence && NextInSequence->isAvailable(IR);
+  }
+
+  /// Called when an instruction is ready to move the next pipeline stage.
+  ///
+  /// Stages are responsible for moving instructions to their immediate
+  /// successor stages.
+  llvm::Error moveToTheNextStage(InstRef &IR) {
+    assert(checkNextStage(IR) && "Next stage is not ready!");
+    return std::move(NextInSequence->execute(IR));
+  }
 
   /// Add a listener to receive callbacks during the execution of this stage.
   void addListener(HWEventListener *Listener);
-- 
2.7.4