From: Krzysztof Parzyszek <kparzysz@codeaurora.org>
Date: Fri, 15 Jul 2016 19:09:37 +0000 (+0000)
Subject: [Hexagon] Replace postprocessDAG with a more elaborate DAG mutation
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=36b0f93294f5c620a6fc58f42c7e7af1f762dac5;p=platform%2Fupstream%2Fllvm.git

[Hexagon] Replace postprocessDAG with a more elaborate DAG mutation

llvm-svn: 275598
---

diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index c3fca85..0cc0293 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -18,25 +18,93 @@
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include "llvm/IR/Function.h"
 
+static cl::opt<bool> SchedPredsCloser("sched-preds-closer",
+    cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> SchedRetvalOptimization("sched-retval-optimization",
+    cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
 using namespace llvm;
 
 #define DEBUG_TYPE "misched"
 
-/// Platform-specific modifications to DAG.
-void VLIWMachineScheduler::postprocessDAG() {
+class HexagonCallMutation : public ScheduleDAGMutation {
+public:
+  void apply(ScheduleDAGInstrs *DAG) override;
+private:
+  bool shouldTFRICallBind(const HexagonInstrInfo &HII,
+                          const SUnit &Inst1, const SUnit &Inst2) const;
+};
+
+// Check if a call and subsequent A2_tfrpi instructions should maintain
+// scheduling affinity. We are looking for the TFRI to be consumed in
+// the next instruction. This should help reduce the instances of
+// double register pairs being allocated and scheduled before a call
+// when not used until after the call. This situation is exacerbated
+// by the fact that we allocate the pair from the callee saves list,
+// leading to excess spills and restores.
+bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
+      const SUnit &Inst1, const SUnit &Inst2) const {
+  if (Inst1.getInstr()->getOpcode() != Hexagon::A2_tfrpi)
+    return false;
+
+  // TypeXTYPE are 64 bit operations.
+  if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE)
+    return true;
+  return false;
+}
+
+void HexagonCallMutation::apply(ScheduleDAGInstrs *DAG) {
   SUnit* LastSequentialCall = nullptr;
+  unsigned VRegHoldingRet = 0;
+  unsigned RetRegister;
+  SUnit* LastUseOfRet = nullptr;
+  auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
+  auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+
   // Currently we only catch the situation when compare gets scheduled
   // before preceding call.
-  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+  for (unsigned su = 0, e = DAG->SUnits.size(); su != e; ++su) {
     // Remember the call.
-    if (SUnits[su].getInstr()->isCall())
-      LastSequentialCall = &(SUnits[su]);
+    if (DAG->SUnits[su].getInstr()->isCall())
+      LastSequentialCall = &DAG->SUnits[su];
     // Look for a compare that defines a predicate.
-    else if (SUnits[su].getInstr()->isCompare() && LastSequentialCall)
-      SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+    else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
+      DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+    // Look for call and tfri* instructions.
+    else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
+             shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
+      DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
+    // Prevent redundant register copies between two calls, which are caused by
+    // both the return value and the argument for the next call being in %R0.
+    // Example:
+    //   1: <call1>
+    //   2: %VregX = COPY %R0
+    //   3: <use of %VregX>
+    //   4: %R0 = ...
+    //   5: <call2>
+    // The scheduler would often swap 3 and 4, so an additional register is
+    // needed. This code inserts a Barrier dependence between 3 & 4 to prevent
+    // this. The same applies for %D0 and %V0/%W0, which are also handled.
+    else if (SchedRetvalOptimization) {
+      const MachineInstr *MI = DAG->SUnits[su].getInstr();
+      if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
+                           MI->readsRegister(Hexagon::V0, &TRI)))  {
+        // %vregX = COPY %R0
+        VRegHoldingRet = MI->getOperand(0).getReg();
+        RetRegister = MI->getOperand(1).getReg();
+        LastUseOfRet = nullptr;
+      } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
+        // <use of %vregX>
+        LastUseOfRet = &DAG->SUnits[su];
+      else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
+        // %R0 = ...
+        DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
+    }
   }
 }
 
+
 /// Check if scheduling of this SU is possible
 /// in the current packet.
 /// It is _not_ precise (statefull), it is more like
@@ -152,9 +220,6 @@ void VLIWMachineScheduler::schedule() {
 
   buildDAGWithRegPressure();
 
-  // Postprocess the DAG to add platform-specific artificial dependencies.
-  postprocessDAG();
-
   SmallVector<SUnit*, 8> TopRoots, BotRoots;
   findRootsAndBiasEdges(TopRoots, BotRoots);
 
@@ -227,6 +292,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
          "-misched-topdown incompatible with -misched-bottomup");
 
   DAG->addMutation(make_unique<HexagonSubtarget::HexagonDAGMutation>());
+  DAG->addMutation(make_unique<HexagonCallMutation>());
 }
 
 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {