From: Krzysztof Parzyszek Date: Fri, 15 Jul 2016 19:09:37 +0000 (+0000) Subject: [Hexagon] Replace postprocessDAG with a more elaborate DAG mutation X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=36b0f93294f5c620a6fc58f42c7e7af1f762dac5;p=platform%2Fupstream%2Fllvm.git [Hexagon] Replace postprocessDAG with a more elaborate DAG mutation llvm-svn: 275598 --- diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp index c3fca85..0cc0293 100644 --- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -18,25 +18,93 @@ #include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/IR/Function.h" +static cl::opt SchedPredsCloser("sched-preds-closer", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + +static cl::opt SchedRetvalOptimization("sched-retval-optimization", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + using namespace llvm; #define DEBUG_TYPE "misched" -/// Platform-specific modifications to DAG. -void VLIWMachineScheduler::postprocessDAG() { +class HexagonCallMutation : public ScheduleDAGMutation { +public: + void apply(ScheduleDAGInstrs *DAG) override; +private: + bool shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const; +}; + +// Check if a call and subsequent A2_tfrpi instructions should maintain +// scheduling affinity. We are looking for the TFRI to be consumed in +// the next instruction. This should help reduce the instances of +// double register pairs being allocated and scheduled before a call +// when not used until after the call. This situation is exacerbated +// by the fact that we allocate the pair from the callee saves list, +// leading to excess spills and restores. +bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII, + const SUnit &Inst1, const SUnit &Inst2) const { + if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi) + return false; + + // TypeXTYPE are 64 bit operations. + if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE) + return true; + return false; +} + +void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) { SUnit* LastSequentialCall = nullptr; + unsigned VRegHoldingRet = 0; + unsigned RetRegister; + SUnit* LastUseOfRet = nullptr; + auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); + auto &HII = *DAG->MF.getSubtarget().getInstrInfo(); + // Currently we only catch the situation when compare gets scheduled // before preceding call. - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) { // Remember the call. - if (SUnits[su].getInstr()->isCall()) - LastSequentialCall = &(SUnits[su]); + if (DAG->SUnits[su].getInstr()->isCall()) + LastSequentialCall = &DAG->SUnits[su]; // Look for a compare that defines a predicate. - else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall) - SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) + DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + // Look for call and tfri* instructions. + else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && + shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) + DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies between two calls, which are caused by + // both the return value and the argument for the next call being in %R0. + // Example: + // 1: + // 2: %VregX = COPY %R0 + // 3: + // 4: %R0 = ... + // 5: + // The scheduler would often swap 3 and 4, so an additional register is + // needed. This code inserts a Barrier dependence between 3 & 4 to prevent + // this. The same applies for %D0 and %V0/%W0, which are also handled. + else if (SchedRetvalOptimization) { + const MachineInstr *MI = DAG->SUnits[su].getInstr(); + if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || + MI->readsRegister(Hexagon::V0, &TRI))) { + // %vregX = COPY %R0 + VRegHoldingRet = MI->getOperand(0).getReg(); + RetRegister = MI->getOperand(1).getReg(); + LastUseOfRet = nullptr; + } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) + // + LastUseOfRet = &DAG->SUnits[su]; + else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) + // %R0 = ... + DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + } } } + /// Check if scheduling of this SU is possible /// in the current packet. /// It is _not_ precise (statefull), it is more like @@ -152,9 +220,6 @@ void VLIWMachineScheduler::schedule() { buildDAGWithRegPressure(); - // Postprocess the DAG to add platform-specific artificial dependencies. - postprocessDAG(); - SmallVector TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); @@ -227,6 +292,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { "-misched-topdown incompatible with -misched-bottomup"); DAG->addMutation(make_unique()); + DAG->addMutation(make_unique()); } void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {