From 115efcd3d12e3617d0a2ee02499b379c67c2c5cb Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 28 Nov 2016 20:11:54 +0000 Subject: [PATCH] MachineScheduler: Export function to construct "default" scheduler. This makes the createGenericSchedLive() function that constructs the default scheduler available for the public API. This should help when you want to get a scheduler and the default list of DAG mutations. This also shrinks the list of default DAG mutations: {Load|Store}ClusterDAGMutation and MacroFusionDAGMutation are no longer added by default. Targets can easily add them if they need them. It also makes it easier for targets to add alternative/custom macrofusion or clustering mutations while staying with the default createGenericSchedLive(). It also saves the callback back and forth in TargetInstrInfo::enableClusterLoads()/enableClusterStores(). Differential Revision: https://reviews.llvm.org/D26986 llvm-svn: 288057 --- llvm/include/llvm/CodeGen/MachineScheduler.h | 15 ++++++++--- llvm/include/llvm/Target/TargetInstrInfo.h | 22 +++++++++------- llvm/lib/CodeGen/MachineScheduler.cpp | 32 +++++++++--------------- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 +-- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 8 ++---- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 10 ++++++++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 8 ------ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 3 --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 18 +++++++------ llvm/lib/Target/X86/X86InstrInfo.cpp | 4 +-- llvm/lib/Target/X86/X86InstrInfo.h | 4 +-- llvm/lib/Target/X86/X86TargetMachine.cpp | 8 ++++++ 12 files changed, 73 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 8dec227..81b8741 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -42,8 +42,8 @@ // // ScheduleDAGInstrs *PassConfig:: // createMachineScheduler(MachineSchedContext *C) { -// ScheduleDAGMI *DAG = new ScheduleDAGMI(C, CustomStrategy(C)); -// DAG->addMutation(new CustomDependencies(DAG->TII, DAG->TRI)); +// ScheduleDAGMI *DAG = createGenericSchedLive(C); +// DAG->addMutation(new CustomDAGMutation(...)); // return DAG; // } // @@ -295,7 +295,8 @@ public: /// /// ScheduleDAGMI takes ownership of the Mutation object. void addMutation(std::unique_ptr Mutation) { - Mutations.push_back(std::move(Mutation)); + if (Mutation) + Mutations.push_back(std::move(Mutation)); } /// \brief True if an edge can be added from PredSU to SuccSU without creating @@ -1015,6 +1016,14 @@ protected: void pickNodeFromQueue(SchedCandidate &Cand); }; +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +/// Adds default DAG mutations. +ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); + std::unique_ptr createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI); diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h index fec51a3..83515bc 100644 --- a/llvm/include/llvm/Target/TargetInstrInfo.h +++ b/llvm/include/llvm/Target/TargetInstrInfo.h @@ -1058,21 +1058,25 @@ public: return false; } - virtual bool enableClusterLoads() const { return false; } - - virtual bool enableClusterStores() const { return false; } - + /// Returns true if the two given memory operations should be scheduled + /// adjacent. Note that you have to add: + /// DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + /// or + /// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + /// to TargetPassConfig::createMachineScheduler() to have an effect. virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const { - return false; + llvm_unreachable("target did not implement shouldClusterMemOps()"); } /// Can this target fuse the given instructions if they are scheduled - /// adjacent. - virtual bool shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const { - return false; + /// adjacent. Note that you have to add: + /// DAG.addMutation(createMacroFusionDAGMutation()); + /// to TargetPassConfig::createMachineScheduler() to have an effect. + virtual bool shouldScheduleAdjacent(const MachineInstr &First, + const MachineInstr &Second) const { + llvm_unreachable("target did not implement shouldScheduleAdjacent()"); } /// Reverses the branch condition of the specified condition list, diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 3984a15..7787ba8 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -230,11 +230,6 @@ static cl::opt EnablePostRAMachineSched( cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden); -/// Forward declare the standard machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); - /// Decrement this iterator until reaching the top or a non-debug instr. static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, @@ -1451,13 +1446,15 @@ namespace llvm { std::unique_ptr createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return make_unique(TII, TRI); + return EnableMemOpCluster ? make_unique(TII, TRI) + : nullptr; } std::unique_ptr createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - return make_unique(TII, TRI); + return EnableMemOpCluster ? make_unique(TII, TRI) + : nullptr; } } // namespace llvm @@ -1566,7 +1563,7 @@ namespace llvm { std::unique_ptr createMacroFusionDAGMutation(const TargetInstrInfo *TII) { - return make_unique(*TII); + return EnableMacroFusion ? make_unique(*TII) : nullptr; } } // namespace llvm @@ -3156,7 +3153,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { +ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique(C)); // Register DAG post-processors. // @@ -3164,20 +3161,16 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - if (EnableMemOpCluster) { - if (DAG->TII->enableClusterLoads()) - DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); - if (DAG->TII->enableClusterStores()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); - } - if (EnableMacroFusion) - DAG->addMutation(createMacroFusionDAGMutation(DAG->TII)); return DAG; } +static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) { + return createGenericSchedLive(C); +} + static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", - createGenericSchedLive); + createConveringSched); //===----------------------------------------------------------------------===// // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. @@ -3308,8 +3301,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { Top.bumpNode(SU); } -/// Create a generic scheduler with no vreg liveness or DAG mutation passes. -static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { +ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { return new ScheduleDAGMI(C, make_unique(C), /*RemoveKillFlags=*/true); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 33e8f23..465137f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1878,8 +1878,8 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, return Offset1 + 1 == Offset2; } -bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const { +bool AArch64InstrInfo::shouldScheduleAdjacent( + const MachineInstr &First, const MachineInstr &Second) const { if (Subtarget.hasArithmeticBccFusion()) { // Fuse CMN, CMP, TST followed by Bcc. unsigned SecondOpcode = Second.getOpcode(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 149469e..90b2c08 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -133,15 +133,11 @@ public: int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; - bool enableClusterLoads() const override { return true; } - - bool enableClusterStores() const override { return true; } - bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt, unsigned NumLoads) const override; - bool shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const override; + bool shouldScheduleAdjacent(const MachineInstr &First, + const MachineInstr &Second) const override; MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index cdc469c..b84abc7 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -299,6 +300,15 @@ public: return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createMacroFusionDAGMutation(DAG->TII)); + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index f88bb69..e4dc659 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -32,14 +32,6 @@ void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} -bool AMDGPUInstrInfo::enableClusterLoads() const { - return true; -} - -bool AMDGPUInstrInfo::enableClusterStores() const { - return true; -} - // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will // be clustered as expected. It should really split into 2 16 store batches. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 46e985d..bd8e389 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -39,9 +39,6 @@ private: public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); - bool enableClusterLoads() const override; - bool enableClusterStores() const override; - bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index baf4d19..7287b56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -102,14 +102,8 @@ static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique(C)); - - const SIInstrInfo *TII = static_cast(DAG->TII); - if (TII->enableClusterLoads()) - DAG->addMutation(createLoadClusterDAGMutation(TII, DAG->TRI)); - - if (TII->enableClusterStores()) - DAG->addMutation(createStoreClusterDAGMutation(TII, DAG->TRI)); - + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); return DAG; } @@ -291,6 +285,14 @@ public: return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + return DAG; + } + void addEarlyCSEOrGVNPass(); void addStraightLineScalarOptimizationPasses(); void addIRPasses() override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 60df110..eb1cb0c 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8023,8 +8023,8 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, return true; } -bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const { +bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First, + const MachineInstr &Second) const { // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index a0292bb..8d74617 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -443,8 +443,8 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; - bool shouldScheduleAdjacent(MachineInstr &First, - MachineInstr &Second) const override; + bool shouldScheduleAdjacent(const MachineInstr &First, + const MachineInstr &Second) const override; void getNoopForMachoTarget(MCInst &NopInst) const override; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index b398702..6eb96a5 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -18,6 +18,7 @@ #include "X86TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" @@ -284,6 +285,13 @@ public: return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + DAG->addMutation(createMacroFusionDAGMutation(DAG->TII)); + return DAG; + } + void addIRPasses() override; bool addInstSelector() override; #ifdef LLVM_BUILD_GLOBAL_ISEL -- 2.7.4