From 115efcd3d12e3617d0a2ee02499b379c67c2c5cb Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze@braunis.de>
Date: Mon, 28 Nov 2016 20:11:54 +0000
Subject: [PATCH] MachineScheduler: Export function to construct "default"
 scheduler.

This makes the createGenericSchedLive() function that constructs the
default scheduler available for the public API. This should help when
you want to get a scheduler and the default list of DAG mutations.

This also shrinks the list of default DAG mutations:
{Load|Store}ClusterDAGMutation and MacroFusionDAGMutation are no longer
added by default. Targets can easily add them if they need them. It also
makes it easier for targets to add alternative/custom macrofusion or
clustering mutations while staying with the default
createGenericSchedLive(). It also saves the callback back and forth in
TargetInstrInfo::enableClusterLoads()/enableClusterStores().

Differential Revision: https://reviews.llvm.org/D26986

llvm-svn: 288057
---
 llvm/include/llvm/CodeGen/MachineScheduler.h     | 15 ++++++++---
 llvm/include/llvm/Target/TargetInstrInfo.h       | 22 +++++++++-------
 llvm/lib/CodeGen/MachineScheduler.cpp            | 32 +++++++++---------------
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp     |  4 +--
 llvm/lib/Target/AArch64/AArch64InstrInfo.h       |  8 ++----
 llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 10 ++++++++
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp       |  8 ------
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h         |  3 ---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp   | 18 +++++++------
 llvm/lib/Target/X86/X86InstrInfo.cpp             |  4 +--
 llvm/lib/Target/X86/X86InstrInfo.h               |  4 +--
 llvm/lib/Target/X86/X86TargetMachine.cpp         |  8 ++++++
 12 files changed, 73 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 8dec227..81b8741 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -42,8 +42,8 @@
 //
 // ScheduleDAGInstrs *<Target>PassConfig::
 // createMachineScheduler(MachineSchedContext *C) {
-//   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, CustomStrategy(C));
-//   DAG->addMutation(new CustomDependencies(DAG->TII, DAG->TRI));
+//   ScheduleDAGMI *DAG = createGenericSchedLive(C);
+//   DAG->addMutation(new CustomDAGMutation(...));
 //   return DAG;
 // }
 //
@@ -295,7 +295,8 @@ public:
   ///
   /// ScheduleDAGMI takes ownership of the Mutation object.
   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
-    Mutations.push_back(std::move(Mutation));
+    if (Mutation)
+      Mutations.push_back(std::move(Mutation));
   }
 
   /// \brief True if an edge can be added from PredSU to SuccSU without creating
@@ -1015,6 +1016,14 @@ protected:
   void pickNodeFromQueue(SchedCandidate &Cand);
 };
 
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+/// Adds default DAG mutations.
+ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
+
 std::unique_ptr<ScheduleDAGMutation>
 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
                              const TargetRegisterInfo *TRI);
diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h
index fec51a3..83515bc 100644
--- a/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -1058,21 +1058,25 @@ public:
     return false;
   }
 
-  virtual bool enableClusterLoads() const { return false; }
-
-  virtual bool enableClusterStores() const { return false; }
-
+  /// Returns true if the two given memory operations should be scheduled
+  /// adjacent. Note that you have to add:
+  ///   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// or
+  ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// to TargetPassConfig::createMachineScheduler() to have an effect.
   virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt,
                                    MachineInstr &SecondLdSt,
                                    unsigned NumLoads) const {
-    return false;
+    llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
 
   /// Can this target fuse the given instructions if they are scheduled
-  /// adjacent.
-  virtual bool shouldScheduleAdjacent(MachineInstr &First,
-                                      MachineInstr &Second) const {
-    return false;
+  /// adjacent. Note that you have to add:
+  ///   DAG.addMutation(createMacroFusionDAGMutation());
+  /// to TargetPassConfig::createMachineScheduler() to have an effect.
+  virtual bool shouldScheduleAdjacent(const MachineInstr &First,
+                                      const MachineInstr &Second) const {
+    llvm_unreachable("target did not implement shouldScheduleAdjacent()");
   }
 
   /// Reverses the branch condition of the specified condition list,
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 3984a15..7787ba8 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched(
     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     cl::init(true), cl::Hidden);
 
-/// Forward declare the standard machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
-
 /// Decrement this iterator until reaching the top or a non-debug instr.
 static MachineBasicBlock::const_iterator
 priorNonDebug(MachineBasicBlock::const_iterator I,
@@ -1451,13 +1446,15 @@ namespace llvm {
 std::unique_ptr<ScheduleDAGMutation>
 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
                              const TargetRegisterInfo *TRI) {
-  return make_unique<LoadClusterMutation>(TII, TRI);
+  return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+                            : nullptr;
 }
 
 std::unique_ptr<ScheduleDAGMutation>
 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
                               const TargetRegisterInfo *TRI) {
-  return make_unique<StoreClusterMutation>(TII, TRI);
+  return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+                            : nullptr;
 }
 
 } // namespace llvm
@@ -1566,7 +1563,7 @@ namespace llvm {
 
 std::unique_ptr<ScheduleDAGMutation>
 createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
-  return make_unique<MacroFusion>(*TII);
+  return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
 }
 
 } // namespace llvm
@@ -3156,7 +3153,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 
 /// Create the standard converging machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
   // Register DAG post-processors.
   //
@@ -3164,20 +3161,16 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
   // data and pass it to later mutations. Have a single mutation that gathers
   // the interesting nodes in one pass.
   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
-  if (EnableMemOpCluster) {
-    if (DAG->TII->enableClusterLoads())
-      DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
-    if (DAG->TII->enableClusterStores())
-      DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
-  }
-  if (EnableMacroFusion)
-    DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
   return DAG;
 }
 
+static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+  return createGenericSchedLive(C);
+}
+
 static MachineSchedRegistry
 GenericSchedRegistry("converge", "Standard converging scheduler.",
-                     createGenericSchedLive);
+                     createConveringSched);
 
 //===----------------------------------------------------------------------===//
 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3308,8 +3301,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   Top.bumpNode(SU);
 }
 
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
   return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
                            /*RemoveKillFlags=*/true);
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 33e8f23..465137f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1878,8 +1878,8 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
   return Offset1 + 1 == Offset2;
 }
 
-bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
-                                              MachineInstr &Second) const {
+bool AArch64InstrInfo::shouldScheduleAdjacent(
+    const MachineInstr &First, const MachineInstr &Second) const {
   if (Subtarget.hasArithmeticBccFusion()) {
     // Fuse CMN, CMP, TST followed by Bcc.
     unsigned SecondOpcode = Second.getOpcode();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 149469e..90b2c08 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -133,15 +133,11 @@ public:
                                   int64_t &Offset, unsigned &Width,
                                   const TargetRegisterInfo *TRI) const;
 
-  bool enableClusterLoads() const override { return true; }
-
-  bool enableClusterStores() const override { return true; }
-
   bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
                            unsigned NumLoads) const override;
 
-  bool shouldScheduleAdjacent(MachineInstr &First,
-                              MachineInstr &Second) const override;
+  bool shouldScheduleAdjacent(const MachineInstr &First,
+                              const MachineInstr &Second) const override;
 
   MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
                                          uint64_t Offset, const MDNode *Var,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index cdc469c..b84abc7 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -299,6 +300,15 @@ public:
     return getTM<AArch64TargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+    return DAG;
+  }
+
   void addIRPasses()  override;
   bool addPreISel() override;
   bool addInstSelector() override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index f88bb69..e4dc659 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -32,14 +32,6 @@ void AMDGPUInstrInfo::anchor() {}
 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
 
-bool AMDGPUInstrInfo::enableClusterLoads() const {
-  return true;
-}
-
-bool AMDGPUInstrInfo::enableClusterStores() const {
-  return true;
-}
-
 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
 // the first 16 loads will be interleaved with the stores, and the next 16 will
 // be clustered as expected. It should really split into 2 16 store batches.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 46e985d..bd8e389 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -39,9 +39,6 @@ private:
 public:
   explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
 
-  bool enableClusterLoads() const override;
-  bool enableClusterStores() const override;
-
   bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                int64_t Offset1, int64_t Offset2,
                                unsigned NumLoads) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index baf4d19..7287b56 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -102,14 +102,8 @@ static ScheduleDAGInstrs *
 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
   ScheduleDAGMILive *DAG =
       new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
-
-  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
-  if (TII->enableClusterLoads())
-    DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI));
-
-  if (TII->enableClusterStores())
-    DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI));
-
+  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
   return DAG;
 }
 
@@ -291,6 +285,14 @@ public:
     return getTM<AMDGPUTargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+    return DAG;
+  }
+
   void addEarlyCSEOrGVNPass();
   void addStraightLineScalarOptimizationPasses();
   void addIRPasses() override;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 60df110..eb1cb0c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8023,8 +8023,8 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
   return true;
 }
 
-bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
-                                          MachineInstr &Second) const {
+bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First,
+                                          const MachineInstr &Second) const {
   // Check if this processor supports macro-fusion. Since this is a minor
   // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
   // proxy for SandyBridge+.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index a0292bb..8d74617 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -443,8 +443,8 @@ public:
                                int64_t Offset1, int64_t Offset2,
                                unsigned NumLoads) const override;
 
-  bool shouldScheduleAdjacent(MachineInstr &First,
-                              MachineInstr &Second) const override;
+  bool shouldScheduleAdjacent(const MachineInstr &First,
+                              const MachineInstr &Second) const override;
 
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index b398702..6eb96a5 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -18,6 +18,7 @@
 #include "X86TargetTransformInfo.h"
 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
@@ -284,6 +285,13 @@ public:
     return getTM<X86TargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+    return DAG;
+  }
+
   void addIRPasses() override;
   bool addInstSelector() override;
 #ifdef LLVM_BUILD_GLOBAL_ISEL
-- 
2.7.4