From: Andrew Trick Date: Mon, 15 Oct 2012 18:02:27 +0000 (+0000) Subject: misched: ILP scheduler for experimental heuristics. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=90f711da9aacbbdeda71fb5f4eed5b4fe46d4cc6;p=platform%2Fupstream%2Fllvm.git misched: ILP scheduler for experimental heuristics. llvm-svn: 165950 --- diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 93990e1..2b96c7a 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -110,6 +110,10 @@ public: /// Initialize the strategy after building the DAG for a new region. virtual void initialize(ScheduleDAGMI *DAG) = 0; + /// Notify this strategy that all roots have been released (including those + /// that depend on EntrySU or ExitSU). + virtual void registerRoots() {} + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to /// schedule the node at the top of the unscheduled region. Otherwise it will /// be scheduled at the bottom. diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGILP.h b/llvm/include/llvm/CodeGen/ScheduleDAGILP.h new file mode 100644 index 0000000..1aa4058 --- /dev/null +++ b/llvm/include/llvm/CodeGen/ScheduleDAGILP.h @@ -0,0 +1,86 @@ +//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Definition of an ILP metric for machine level instruction scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H +#define LLVM_CODEGEN_SCHEDULEDAGILP_H + +#include "llvm/Support/DataTypes.h" +#include + +namespace llvm { + +class raw_ostream; +class ScheduleDAGInstrs; +class SUnit; + +/// \brief Represent the ILP of the subDAG rooted at a DAG node. +struct ILPValue { + unsigned InstrCount; + unsigned Cycles; + + ILPValue(): InstrCount(0), Cycles(0) {} + + ILPValue(unsigned count, unsigned cycles): + InstrCount(count), Cycles(cycles) {} + + bool isValid() const { return Cycles > 0; } + + // Order by the ILP metric's value. + bool operator<(ILPValue RHS) const { + return (uint64_t)InstrCount * RHS.Cycles + < (uint64_t)Cycles * RHS.InstrCount; + } + bool operator>(ILPValue RHS) const { + return RHS < *this; + } + bool operator<=(ILPValue RHS) const { + return (uint64_t)InstrCount * RHS.Cycles + <= (uint64_t)Cycles * RHS.InstrCount; + } + bool operator>=(ILPValue RHS) const { + return RHS <= *this; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream &OS) const; + + void dump() const; +#endif +}; + +/// \brief Compute the values of each DAG node for an ILP metric. +/// +/// This metric assumes that the DAG is a forest of trees with roots at the +/// bottom of the schedule. +class ScheduleDAGILP { + bool IsBottomUp; + std::vector ILPValues; + +public: + ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {} + + /// \brief Initialize the result data with the size of the DAG. + void resize(unsigned NumSUnits); + + /// \brief Compute the ILP metric for the subDAG at this root. + void computeILP(const SUnit *Root); + + /// \brief Get the ILP value for a DAG node. + ILPValue getILP(const SUnit *SU); +}; + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val); + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 11a7d47..74190e9 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" @@ -451,26 +452,6 @@ updateScheduledPressure(std::vector NewMaxPressure) { } } -// Release all DAG roots for scheduling. -void ScheduleDAGMI::releaseRoots() { - SmallVector BotRoots; - - for (std::vector::iterator - I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); - // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - BotRoots.push_back(&(*I)); - } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) - SchedImpl->releaseBottomNode(*I); -} - /// schedule - Called back from MachineScheduler::runOnMachineFunction /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. @@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() { } } +// Release all DAG roots for scheduling. +void ScheduleDAGMI::releaseRoots() { + SmallVector BotRoots; + + for (std::vector::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues() { + // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); @@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() { // Release all DAG roots for scheduling. releaseRoots(); + SchedImpl->registerRoots(); + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; } @@ -1198,6 +1202,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched); //===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + ScheduleDAGILP *ILP; + bool MaximizeILP; + + ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + bool operator()(const SUnit *A, const SUnit *B) const { + // Return true if A comes after B in the Q. + if (MaximizeILP) + return ILP->getILP(A) < ILP->getILP(B); + else + return ILP->getILP(A) > ILP->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + ScheduleDAGILP ILP; + ILPOrder Cmp; + + std::vector ReadyQ; +public: + ILPScheduler(bool MaximizeILP) + : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + + virtual void initialize(ScheduleDAGMI *DAG) { + ReadyQ.clear(); + ILP.resize(DAG->SUnits.size()); + } + + virtual void registerRoots() { + for (std::vector::const_iterator + I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { + ILP.computeILP(*I); + } + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + if (ReadyQ.empty()) return NULL; + pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() + << " ILP: " << ILP.getILP(SU) << '\n'); + return SU; + } + + virtual void schedNode(SUnit *, bool) {} + + virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } + + virtual void releaseBottomNode(SUnit *SU) { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new ILPScheduler(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index aa45a68..8dcbf83 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAGILP.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" @@ -30,6 +31,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + void backtrack() { DFSStack.pop_back(); } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +void ScheduleDAGILP::resize(unsigned NumSUnits) { + ILPValues.resize(NumSUnits); +} + +ILPValue ScheduleDAGILP::getILP(const SUnit *SU) { + return ILPValues[SU->NodeNum]; +} + +// A leaf node has an ILP of 1/1. +static ILPValue initILP(const SUnit *SU) { + unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1; + return ILPValue(Cnt, 1 + SU->getDepth()); +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void ScheduleDAGILP::computeILP(const SUnit *Root) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDAGReverseDFS DFS; + // Mark a node visited by validating it. + ILPValues[Root->NodeNum] = initILP(Root); + DFS.follow(Root); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SUnit *PredSU = DFS.getPred()->getSUnit(); + DFS.advance(); + // If the pred is already valid, skip it. + if (ILPValues[PredSU->NodeNum].isValid()) + continue; + ILPValues[PredSU->NodeNum] = initILP(PredSU); + DFS.follow(PredSU); + } + // Visit the top of the stack in postorder and backtrack. + unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount; + DFS.backtrack(); + if (DFS.isComplete()) + break; + // Add the recently finished predecessor's bottom-up descendent count. + ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ILPValue::print(raw_ostream &OS) const { + if (!isValid()) + OS << "BADILP"; + OS << InstrCount << " / " << Cycles << " = " + << format("%g", ((double)InstrCount / Cycles)); +} + +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm +#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/llvm/test/CodeGen/X86/misched-ilp.ll b/llvm/test/CodeGen/X86/misched-ilp.ll new file mode 100644 index 0000000..98c7538 --- /dev/null +++ b/llvm/test/CodeGen/X86/misched-ilp.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=x86-64 -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s +; RUN: llc < %s -march=x86-64 -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s +; +; Basic verification of the ScheduleDAGILP metric. +; +; MAX: addss +; MAX: addss +; MAX: addss +; MAX: subss +; MAX: addss +; +; MIN: addss +; MIN: addss +; MIN: subss +; MIN: addss +; MIN: addss +define float @ilpsched(float %a, float %b, float %c, float %d, float %e, float %f) nounwind uwtable readnone ssp { +entry: + %add = fadd float %a, %b + %add1 = fadd float %c, %d + %add2 = fadd float %e, %f + %add3 = fsub float %add1, %add2 + %add4 = fadd float %add, %add3 + ret float %add4 +}